gospacedev commited on
Commit
3721320
Β·
verified Β·
1 Parent(s): b4920d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -11
app.py CHANGED
@@ -6,17 +6,24 @@ from gtts import gTTS
6
  from transformers import pipeline
7
  from huggingface_hub import InferenceClient
8
 
 
9
  ASR_MODEL_NAME = "openai/whisper-small"
10
  LLM_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
11
 
 
12
  system_prompt = """"<s>[INST] You are Friday, a helpful and conversational AI assistant, and you respond with one to two sentences. [/INST] Hello there! I'm Friday, how can I help you?</s>"""
13
 
14
- instruct_history = system_prompt + """"""
 
 
15
 
 
16
  client = InferenceClient(LLM_MODEL_NAME)
17
 
 
18
  device = 0 if torch.cuda.is_available() else "cpu"
19
 
 
20
  pipe = pipeline(
21
  task="automatic-speech-recognition",
22
  model=ASR_MODEL_NAME,
@@ -44,7 +51,9 @@ def generate(instruct_history, temperature=0.1, max_new_tokens=128, top_p=0.95,
44
  return output
45
 
46
  @spaces.GPU(duration=60)
47
- def transcribe(audio, instruct_history=instruct_history):
 
 
48
  sr, y = audio
49
  y = y.astype(np.float32)
50
  y /= np.max(np.abs(y))
@@ -53,27 +62,25 @@ def transcribe(audio, instruct_history=instruct_history):
53
  transcribed_user_audio = pipe({"sampling_rate": sr, "raw": y})["text"]
54
 
55
  # Append user input to history
56
- formatted_history = f"""Human: {transcribed_user_audio}\n\n"""
57
- instruct_history += f"""<s>[INST] {transcribed_user_audio} [/INST] """
58
 
59
  # Generate LLM response
60
  llm_response = generate(instruct_history)
61
 
62
  # Append AI response to history
63
- instruct_history += f""" {llm_response}</s>"""
64
- formatted_history += f"""Friday: {llm_response}\n\n"""
65
 
66
  # Convert AI response to audio
67
  audio_response = gTTS(llm_response)
68
  audio_response.save("response.mp3")
69
 
70
- # Display the full conversation history
71
- full_history = formatted_history
72
-
73
- return "response.mp3", full_history
74
 
75
  with gr.Blocks() as demo:
76
- gr.HTML("<center><h1>Friday: AI Virtual Assistant</h1><center>")
77
 
78
  with gr.Row():
79
  audio_input = gr.Audio(label="Human", sources="microphone")
 
6
  from transformers import pipeline
7
  from huggingface_hub import InferenceClient
8
 
9
+ # Model names
10
  ASR_MODEL_NAME = "openai/whisper-small"
11
  LLM_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
12
 
13
+ # Initial system prompt
14
  system_prompt = """"<s>[INST] You are Friday, a helpful and conversational AI assistant, and you respond with one to two sentences. [/INST] Hello there! I'm Friday, how can I help you?</s>"""
15
 
16
+ # Global variables for history
17
+ instruct_history = system_prompt
18
+ formatted_history = ""
19
 
20
+ # Create inference client for text generation
21
  client = InferenceClient(LLM_MODEL_NAME)
22
 
23
+ # Set device for ASR pipeline
24
  device = 0 if torch.cuda.is_available() else "cpu"
25
 
26
+ # ASR pipeline
27
  pipe = pipeline(
28
  task="automatic-speech-recognition",
29
  model=ASR_MODEL_NAME,
 
51
  return output
52
 
53
  @spaces.GPU(duration=60)
54
+ def transcribe(audio):
55
+ global instruct_history, formatted_history
56
+
57
  sr, y = audio
58
  y = y.astype(np.float32)
59
  y /= np.max(np.abs(y))
 
62
  transcribed_user_audio = pipe({"sampling_rate": sr, "raw": y})["text"]
63
 
64
  # Append user input to history
65
+ formatted_history += f"πŸ˜ƒ Human: {transcribed_user_audio}\n\n"
66
+ instruct_history += f"<s>[INST] {transcribed_user_audio} [/INST] "
67
 
68
  # Generate LLM response
69
  llm_response = generate(instruct_history)
70
 
71
  # Append AI response to history
72
+ instruct_history += f" {llm_response}</s>"
73
+ formatted_history += f"πŸ€– Friday: {llm_response}\n\n"
74
 
75
  # Convert AI response to audio
76
  audio_response = gTTS(llm_response)
77
  audio_response.save("response.mp3")
78
 
79
+ # Return the full conversation history
80
+ return "response.mp3", formatted_history
 
 
81
 
82
  with gr.Blocks() as demo:
83
+ gr.HTML("<center><h1>Friday: AI Virtual Assistant πŸ€–</h1><center>")
84
 
85
  with gr.Row():
86
  audio_input = gr.Audio(label="Human", sources="microphone")