devkushal75 commited on
Commit
c1533fd
·
verified ·
1 Parent(s): d95581b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -116
app.py CHANGED
@@ -1,116 +1,122 @@
1
- import gradio as gr
2
- from llama_cpp import Llama
3
- import whisper
4
- from gtts import gTTS
5
- import tempfile
6
- import os
7
-
8
- # ----- Initialization -----
9
-
10
- # Initialize the LLAMA model. Update the model_path to point to your model file.
11
- llm = Llama.from_pretrained(
12
- repo_id="TheBloke/Llama-2-7B-GGUF",
13
- filename="llama-2-7b.Q2_K.gguf",
14
- )
15
-
16
-
17
- # Load the Whisper model for speech-to-text transcription.
18
- whisper_model = whisper.load_model("base")
19
-
20
- # ----- Helper Functions -----
21
-
22
- def transcribe_audio(audio_file):
23
- """
24
- Transcribes the provided audio file using Whisper.
25
- """
26
- if audio_file is None:
27
- return ""
28
- result = whisper_model.transcribe(audio_file)
29
- return result["text"]
30
-
31
- def generate_response(prompt, max_tokens=150, temperature=0.7):
32
- """
33
- Uses LLAMA-CPP to generate a response for the given prompt.
34
- """
35
- # Call the LLAMA model. The output is a dict with a "choices" list.
36
- output = llm(prompt, max_tokens=max_tokens, temperature=temperature, echo=True)
37
- response = output["choices"][0]["text"]
38
- return response.strip()
39
-
40
- def text_to_speech(text):
41
- """
42
- Converts text to speech using gTTS and returns the filepath to the saved audio.
43
- """
44
- tts = gTTS(text=text, lang="en")
45
- tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
46
- tts.save(tmp_file.name)
47
- return tmp_file.name
48
-
49
- def voice_chat(audio, text, history, max_tokens, temperature):
50
- """
51
- Handles a single turn of the conversation:
52
- - If an audio file is provided and no text message, transcribe it.
53
- - Builds a conversation prompt from the chat history.
54
- - Generates a response from LLAMA.
55
- - Converts the response to speech.
56
- Returns updated chat history, the response text, the response audio filepath, and updated state.
57
- """
58
- # Use the transcribed audio if text is empty.
59
- if audio is not None and (text is None or text.strip() == ""):
60
- user_input = transcribe_audio(audio)
61
- else:
62
- user_input = text if text else ""
63
-
64
- # Build the conversation prompt (history is a list of tuples: (user, assistant))
65
- prompt = ""
66
- if history:
67
- for (user_turn, bot_turn) in history:
68
- prompt += f"User: {user_turn}\nAssistant: {bot_turn}\n"
69
- prompt += f"User: {user_input}\nAssistant: "
70
-
71
- # Generate response using LLAMA-CPP.
72
- response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
73
- # Convert the response to speech audio.
74
- audio_response = text_to_speech(response_text)
75
-
76
- # Append this turn to the conversation history.
77
- new_history = history.copy() if history else []
78
- new_history.append((user_input, response_text))
79
-
80
- # Return four outputs: update the Chatbot display, show the assistant text, play audio, and update state.
81
- return new_history, response_text, audio_response, new_history
82
-
83
- # ----- Gradio Interface -----
84
-
85
- with gr.Blocks() as demo:
86
- gr.Markdown("# Voice Chatbot with LLAMA‑CPP")
87
-
88
- with gr.Row():
89
- with gr.Column(scale=5):
90
- # User inputs: Audio input and/or text input.
91
- audio_input = gr.Audio(type="filepath", label="Speak to Chatbot")
92
- text_input = gr.Textbox(placeholder="Or type your message", label="Your Message")
93
- send_btn = gr.Button("Send")
94
- max_tokens_slider = gr.Slider(50, 300, value=150, step=10, label="Max Tokens")
95
- temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
96
- with gr.Column(scale=7):
97
- # Display outputs: Chat history, assistant text response, and audio playback.
98
- chat_history = gr.Chatbot(label="Chat History")
99
- response_textbox = gr.Textbox(label="Assistant Response")
100
- audio_output = gr.Audio(label="Response Audio", type="filepath")
101
-
102
- # Gradio State to hold the conversation history.
103
- state = gr.State([])
104
-
105
- def run_voice_chat(audio, text, history, max_tokens, temperature):
106
- return voice_chat(audio, text, history, max_tokens, temperature)
107
-
108
- # On clicking the "Send" button, run the voice_chat function.
109
- send_btn.click(
110
- fn=run_voice_chat,
111
- inputs=[audio_input, text_input, state, max_tokens_slider, temperature_slider],
112
- outputs=[chat_history, response_textbox, audio_output, state]
113
- )
114
-
115
- # Launch the app.
116
- demo.launch()
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ import whisper
4
+ from gtts import gTTS
5
+ import tempfile
6
+ import os
7
+ from huggingface_hub import hf_hub_download
8
+
9
+
10
+ # ----- Initialization -----
11
+ model_path = hf_hub_download(repo_id=TheBloke/Llama-2-7B-GGUF, filename=llama-2-7b.Q2_K.gguf)
12
+
13
+ # Initialize the LLAMA model. Update the model_path to point to your model file.
14
+ llm = Llama(
15
+ model_path=model_path,
16
+ n_threads=2, # CPU cores
17
+ n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
18
+ n_gpu_layers=43, # Change this value based on your model and your GPU VRAM pool.
19
+ n_ctx=4096, # Context window
20
+ )
21
+
22
+
23
+ # Load the Whisper model for speech-to-text transcription.
24
+ whisper_model = whisper.load_model("base")
25
+
26
+ # ----- Helper Functions -----
27
+
28
+ def transcribe_audio(audio_file):
29
+ """
30
+ Transcribes the provided audio file using Whisper.
31
+ """
32
+ if audio_file is None:
33
+ return ""
34
+ result = whisper_model.transcribe(audio_file)
35
+ return result["text"]
36
+
37
+ def generate_response(prompt, max_tokens=150, temperature=0.7):
38
+ """
39
+ Uses LLAMA-CPP to generate a response for the given prompt.
40
+ """
41
+ # Call the LLAMA model. The output is a dict with a "choices" list.
42
+ output = llm(prompt, max_tokens=max_tokens, temperature=temperature, echo=True)
43
+ response = output["choices"][0]["text"]
44
+ return response.strip()
45
+
46
+ def text_to_speech(text):
47
+ """
48
+ Converts text to speech using gTTS and returns the filepath to the saved audio.
49
+ """
50
+ tts = gTTS(text=text, lang="en")
51
+ tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
52
+ tts.save(tmp_file.name)
53
+ return tmp_file.name
54
+
55
+ def voice_chat(audio, text, history, max_tokens, temperature):
56
+ """
57
+ Handles a single turn of the conversation:
58
+ - If an audio file is provided and no text message, transcribe it.
59
+ - Builds a conversation prompt from the chat history.
60
+ - Generates a response from LLAMA.
61
+ - Converts the response to speech.
62
+ Returns updated chat history, the response text, the response audio filepath, and updated state.
63
+ """
64
+ # Use the transcribed audio if text is empty.
65
+ if audio is not None and (text is None or text.strip() == ""):
66
+ user_input = transcribe_audio(audio)
67
+ else:
68
+ user_input = text if text else ""
69
+
70
+ # Build the conversation prompt (history is a list of tuples: (user, assistant))
71
+ prompt = ""
72
+ if history:
73
+ for (user_turn, bot_turn) in history:
74
+ prompt += f"User: {user_turn}\nAssistant: {bot_turn}\n"
75
+ prompt += f"User: {user_input}\nAssistant: "
76
+
77
+ # Generate response using LLAMA-CPP.
78
+ response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
79
+ # Convert the response to speech audio.
80
+ audio_response = text_to_speech(response_text)
81
+
82
+ # Append this turn to the conversation history.
83
+ new_history = history.copy() if history else []
84
+ new_history.append((user_input, response_text))
85
+
86
+ # Return four outputs: update the Chatbot display, show the assistant text, play audio, and update state.
87
+ return new_history, response_text, audio_response, new_history
88
+
89
+ # ----- Gradio Interface -----
90
+
91
+ with gr.Blocks() as demo:
92
+ gr.Markdown("# Voice Chatbot with LLAMA‑CPP")
93
+
94
+ with gr.Row():
95
+ with gr.Column(scale=5):
96
+ # User inputs: Audio input and/or text input.
97
+ audio_input = gr.Audio(type="filepath", label="Speak to Chatbot")
98
+ text_input = gr.Textbox(placeholder="Or type your message", label="Your Message")
99
+ send_btn = gr.Button("Send")
100
+ max_tokens_slider = gr.Slider(50, 300, value=150, step=10, label="Max Tokens")
101
+ temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
102
+ with gr.Column(scale=7):
103
+ # Display outputs: Chat history, assistant text response, and audio playback.
104
+ chat_history = gr.Chatbot(label="Chat History")
105
+ response_textbox = gr.Textbox(label="Assistant Response")
106
+ audio_output = gr.Audio(label="Response Audio", type="filepath")
107
+
108
+ # Gradio State to hold the conversation history.
109
+ state = gr.State([])
110
+
111
+ def run_voice_chat(audio, text, history, max_tokens, temperature):
112
+ return voice_chat(audio, text, history, max_tokens, temperature)
113
+
114
+ # On clicking the "Send" button, run the voice_chat function.
115
+ send_btn.click(
116
+ fn=run_voice_chat,
117
+ inputs=[audio_input, text_input, state, max_tokens_slider, temperature_slider],
118
+ outputs=[chat_history, response_textbox, audio_output, state]
119
+ )
120
+
121
+ # Launch the app.
122
+ demo.launch()