jujutech commited on
Commit
7634353
·
verified ·
1 Parent(s): 5eaa950

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -37
app.py CHANGED
@@ -1,42 +1,27 @@
1
  import gradio as gr
2
- from gradio_client import Client
 
 
3
 
4
- def get_speech(text, voice):
5
- try:
6
- client = Client("https://collabora-whisperspeech.hf.space/")
7
- result = client.predict(
8
- text, # str in 'Enter multilingual text💬📝' Textbox component
9
- voice, # filepath in 'Upload or Record Speaker Audio (optional)🌬️💬' Audio component
10
- "", # str in 'alternatively, you can paste in an audio file URL:' Textbox component
11
- 14, # float (numeric value between 10 and 15) in 'Tempo (in characters per second)' Slider component
12
- api_name="/whisper_speech_demo"
13
- )
14
- print(result)
15
- return result
16
- except Exception as e:
17
- raise gr.Error(f"Error in get_speech: {str(e)}")
18
 
19
- def get_dreamtalk(image_in, speech):
20
- try:
21
- client = Client("https://fffiloni-dreamtalk.hf.space/")
22
- result = client.predict(
23
- speech, # filepath in 'Audio input' Audio component
24
- image_in, # filepath in 'Image' Image component
25
- "M030_front_neutral_level1_001.mat", # Literal in 'emotional style' Dropdown component
26
- api_name="/infer"
27
- )
28
- print(result)
29
- return result['video']
30
- except Exception as e:
31
- raise gr.Error(f"Error in get_dreamtalk: {str(e)}")
32
 
33
  def pipe(text, voice, image_in):
34
- try:
35
- speech = get_speech(text, voice)
36
- video = get_dreamtalk(image_in, speech)
37
- return video
38
- except Exception as e:
39
- raise gr.Error(f"An error occurred while processing: {str(e)}")
40
 
41
  with gr.Blocks() as demo:
42
  with gr.Column():
@@ -44,11 +29,9 @@ with gr.Blocks() as demo:
44
  <h1 style="text-align: center;">
45
  Talking Image
46
  </h1>
47
- <p style="text-align: center;"></p>
48
  <h3 style="text-align: center;">
49
  Clone your voice and make your photos speak.
50
  </h3>
51
- <p style="text-align: center;"></p>
52
  """)
53
  with gr.Row():
54
  with gr.Column():
@@ -65,4 +48,4 @@ with gr.Blocks() as demo:
65
  outputs=[video_o],
66
  concurrency_limit=3
67
  )
68
- demo.queue(max_size=10).launch(show_error=True, show_api=False)
 
1
  import gradio as gr
2
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
3
+ import torch
4
+ import librosa
5
 
6
+ # Load the model and processor
7
+ processor = Wav2Vec2Processor.from_pretrained("SpeechResearch/whisper-ft-normal")
8
+ model = Wav2Vec2ForCTC.from_pretrained("SpeechResearch/whisper-ft-normal")
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ def transcribe_speech(audio_path):
11
+ speech, _ = librosa.load(audio_path, sr=16000)
12
+ input_values = processor(speech, return_tensors="pt", padding="longest").input_values
13
+ with torch.no_grad():
14
+ logits = model(input_values).logits
15
+ predicted_ids = torch.argmax(logits, dim=-1)
16
+ transcription = processor.batch_decode(predicted_ids)
17
+ return transcription[0]
 
 
 
 
 
18
 
19
  def pipe(text, voice, image_in):
20
+ # Assuming voice is a file path to the audio file
21
+ transcription = transcribe_speech(voice)
22
+ # Now use this transcription with your get_dreamtalk function
23
+ video = get_dreamtalk(image_in, transcription)
24
+ return video
 
25
 
26
  with gr.Blocks() as demo:
27
  with gr.Column():
 
29
  <h1 style="text-align: center;">
30
  Talking Image
31
  </h1>
 
32
  <h3 style="text-align: center;">
33
  Clone your voice and make your photos speak.
34
  </h3>
 
35
  """)
36
  with gr.Row():
37
  with gr.Column():
 
48
  outputs=[video_o],
49
  concurrency_limit=3
50
  )
51
+ demo.queue(max_size=10).launch(show_error=True, show_api=False)