gospacedev commited on
Commit
d51e19d
·
verified ·
1 Parent(s): 25f1135

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import spaces
3
+ import numpy as np
4
+ import gradio as gr
5
+ from gtts import gTTS
6
+ from transformers import pipeline
7
+ from huggingface_hub import InferenceClient
8
+
9
+
10
+ ASR_MODEL_NAME = "openai/whisper-small"
11
+ NLP_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
12
+ system_prompt = """"<s> [INST] You are Friday a helpful and conversational assistant. [/INST]"""
13
+
14
+ client = InferenceClient(NLP_MODEL_NAME)
15
+
16
+ device = 0 if torch.cuda.is_available() else "cpu"
17
+
18
+ pipe = pipeline(
19
+ task="automatic-speech-recognition",
20
+ model=ASR_MODEL_NAME,
21
+ device=device,
22
+ )
23
+
24
+
25
+ def generate(prompt, temperature=0.1, max_new_tokens=64, top_p=0.95, repetition_penalty=1.0):
26
+ temperature = float(temperature)
27
+ if temperature < 1e-2:
28
+ temperature = 1e-2
29
+ top_p = float(top_p)
30
+
31
+ generate_kwargs = dict(
32
+ temperature=temperature,
33
+ max_new_tokens=max_new_tokens,
34
+ top_p=top_p,
35
+ repetition_penalty=repetition_penalty,
36
+ do_sample=True,
37
+ seed=42,
38
+ )
39
+
40
+ formatted_prompt = system_prompt + f""" {prompt} </s>"""
41
+
42
+ output = client.text_generation(
43
+ formatted_prompt, **generate_kwargs, stream=False, details=False, return_full_text=False)
44
+
45
+ print(output)
46
+ return output
47
+
48
+
49
+ @spaces.GPU(duration=60)
50
+ def transcribe(audio):
51
+ sr, y = audio
52
+ y = y.astype(np.float32)
53
+ y /= np.max(np.abs(y))
54
+
55
+ inputs = pipe({"sampling_rate": sr, "raw": y})["text"]
56
+
57
+ print("User transcription: ", inputs)
58
+
59
+ response = generate(inputs)
60
+ audio_response = gTTS(response)
61
+ audio_response.save("response.mp3")
62
+
63
+ print(audio_response)
64
+
65
+ return "response.mp3"
66
+
67
+
68
+ with gr.Blocks() as demo:
69
+ gr.HTML("<center><h1>Friday: AI Virtual Assistant<h1><center>")
70
+
71
+ with gr.Row():
72
+ audio_input = gr.Audio(label="Human", sources="microphone")
73
+ output_audio = gr.Audio(label="Friday", type="filepath",
74
+ interactive=False,
75
+ autoplay=True,
76
+ elem_classes="audio")
77
+
78
+ transcribe_btn = gr.Button("Transcribe")
79
+ transcribe_btn.click(fn=transcribe, inputs=audio_input,
80
+ outputs=output_audio)
81
+
82
+
83
+ demo.queue()
84
+ demo.launch()