AI-Edify commited on
Commit
15bf7ba
·
verified ·
1 Parent(s): 7a50665

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -39
app.py CHANGED
@@ -1,29 +1,21 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
27
 
28
  response = ""
29
 
@@ -35,29 +27,65 @@ def respond(
35
  top_p=top_p,
36
  ):
37
  token = message.choices[0].delta.content
38
-
39
  response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  ],
 
 
 
 
59
  )
60
 
61
 
62
  if __name__ == "__main__":
63
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ import difflib
4
 
5
+ # Load Hugging Face Inference client
 
 
6
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
7
 
8
+ # Load the speech-to-text model from Hugging Face
9
+ s2t = gr.Interface.load('huggingface/facebook/s2t-medium-librispeech-asr')
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ def generate_text_with_huggingface(system_message, max_tokens, temperature, top_p):
13
+ """
14
+ Function to generate text using Hugging Face Inference API
15
+ based on the system message, max tokens, temperature, and top-p.
16
+ """
17
+ messages = [{"role": "system", "content": system_message}]
18
+ message = ""
19
 
20
  response = ""
21
 
 
27
  top_p=top_p,
28
  ):
29
  token = message.choices[0].delta.content
 
30
  response += token
31
+
32
+ return response.strip() # Return the generated text
33
+
34
+
35
+ def pronunciation_feedback(transcription, reference_text):
36
+ """
37
+ Function to provide feedback on pronunciation based on differences
38
+ between the transcription and the reference (expected) text.
39
+ """
40
+ diff = difflib.ndiff(reference_text.split(), transcription.split())
41
+ # Identify words that are incorrect or missing in the transcription
42
+ errors = [word for word in diff if word.startswith('- ')]
43
+
44
+ if errors:
45
+ feedback = "Mispronounced words: " + ', '.join([error[2:] for error in errors])
46
+ else:
47
+ feedback = "Great job! Your pronunciation is spot on."
48
+
49
+ return feedback
50
+
51
+
52
+ def transcribe_and_feedback(audio, system_message, max_tokens, temperature, top_p):
53
+ """
54
+ Transcribe the audio and provide pronunciation feedback using the generated text.
55
+ """
56
+ # Generate the reference text using Hugging Face Inference API
57
+ reference_text = generate_text_with_huggingface(system_message, max_tokens, temperature, top_p)
58
+
59
+ # Transcribe the audio using the speech-to-text model
60
+ transcription = s2t(audio)
61
+
62
+ # Provide pronunciation feedback based on the transcription and the generated text
63
+ feedback = pronunciation_feedback(transcription, reference_text)
64
+
65
+ return transcription, feedback, reference_text
66
+
67
+
68
+ # Gradio interface
69
+ demo = gr.Interface(
70
+ fn=transcribe_and_feedback, # The function that transcribes audio and provides feedback
71
+ inputs=[
72
+ gr.Audio(source="microphone", type="filepath", label="Record Audio"), # Microphone input for recording
73
+ gr.Textbox(value="Please read a simple sentence.", label="System message"), # Message used to generate text
74
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), # Controls max token length for the generated text
75
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), # Temperature control for text generation
76
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)") # Top-p control for text generation
77
+ ],
78
+ outputs=[
79
+ gr.Textbox(label="Transcription"), # Display transcription of the audio
80
+ gr.Textbox(label="Pronunciation Feedback"), # Feedback on pronunciation
81
+ gr.Textbox(label="Generated Text (What You Were Supposed to Read)") # Display the text generated by the API
82
  ],
83
+ title="Speech-to-Text with Pronunciation Feedback",
84
+ description="Record an audio sample and the system will transcribe it, "
85
+ "compare your transcription to the generated text, and give pronunciation feedback.",
86
+ live=True # Real-time interaction
87
  )
88
 
89
 
90
  if __name__ == "__main__":
91
+ demo.launch(enable_queue=True, show_error=True)