Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +70 -0
- text_speech_utils.py +52 -0
app.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import openai
|
4 |
+
from text_speech_utils import * # Assuming this module exists for your audio functionality
|
5 |
+
|
6 |
+
# Define filenames for audio and conversation output
|
7 |
+
input_audio_filename = 'input.wav'
|
8 |
+
output_audio_filename = 'chatgpt_response.wav'
|
9 |
+
output_conversation_filename = 'ChatGPT_conversation.txt'
|
10 |
+
|
11 |
+
# Initialize app
|
12 |
+
if 'messages' not in st.session_state:
|
13 |
+
st.session_state['messages'] = [{"role": "system", "content": "You are a helpful assistant."}]
|
14 |
+
|
15 |
+
# Allow user to input OpenAI API Key via Streamlit text input
|
16 |
+
openai.api_key = st.text_input("Enter your OpenAI API Key", type="password")
|
17 |
+
|
18 |
+
# Display a warning if API key is not provided
|
19 |
+
if not openai.api_key:
|
20 |
+
st.warning("Please enter your OpenAI API key to proceed.")
|
21 |
+
|
22 |
+
# UI components
|
23 |
+
st.title("My awesome personal assistant")
|
24 |
+
sec = st.slider("Select number of seconds of recording", min_value=2, max_value=8, value=5)
|
25 |
+
|
26 |
+
# Record audio + transcribe with Whisper + get GPT-3 response
|
27 |
+
if st.button('Record audio'):
|
28 |
+
if openai.api_key: # Proceed only if API key is provided
|
29 |
+
st.write("Recording...")
|
30 |
+
record_audio(input_audio_filename, sec)
|
31 |
+
|
32 |
+
transcription = transcribe_audio(input_audio_filename)
|
33 |
+
st.write(f"Me: {transcription['text']}")
|
34 |
+
st.session_state['messages'].append({"role": "user", "content": transcription['text']})
|
35 |
+
|
36 |
+
bot = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=st.session_state['messages'])
|
37 |
+
response = bot.choices[0].message.content
|
38 |
+
st.write(f"GPT: {response}")
|
39 |
+
|
40 |
+
save_text_as_audio(response, output_audio_filename)
|
41 |
+
play_audio(output_audio_filename)
|
42 |
+
|
43 |
+
st.session_state['messages'].append({"role": "assistant", "content": response})
|
44 |
+
else:
|
45 |
+
st.error("API key is required to interact with GPT.")
|
46 |
+
|
47 |
+
# # Download conversation button
|
48 |
+
# st.download_button(label="Download conversation",
|
49 |
+
# data=pd.DataFrame(st.session_state['messages']).to_csv(index=False).encode('utf-8'),
|
50 |
+
# file_name=output_conversation_filename)
|
51 |
+
|
52 |
+
# Function to generate conversation as plain text
|
53 |
+
def generate_conversation_text(messages):
|
54 |
+
conversation_text = ""
|
55 |
+
for message in messages:
|
56 |
+
if message["role"] == "user":
|
57 |
+
conversation_text += f"Me: {message['content']}\n"
|
58 |
+
elif message["role"] == "assistant":
|
59 |
+
conversation_text += f"GPT: {message['content']}\n"
|
60 |
+
elif message["role"] == "system":
|
61 |
+
conversation_text += f"System: {message['content']}\n"
|
62 |
+
return conversation_text
|
63 |
+
|
64 |
+
# Download conversation button
|
65 |
+
st.download_button(
|
66 |
+
label="Download conversation",
|
67 |
+
data=generate_conversation_text(st.session_state['messages']).encode('utf-8'),
|
68 |
+
file_name=output_conversation_filename,
|
69 |
+
mime="text/plain"
|
70 |
+
)
|
text_speech_utils.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
import sounddevice as sd
|
3 |
+
import audiofile as af
|
4 |
+
from scipy.io.wavfile import write
|
5 |
+
from gtts import gTTS
|
6 |
+
|
7 |
+
import multiprocessing
|
8 |
+
import pyttsx3
|
9 |
+
import keyboard
|
10 |
+
|
11 |
+
def say(text):
|
12 |
+
p = multiprocessing.Process(target=pyttsx3.speak, args=(text,))
|
13 |
+
p.start()
|
14 |
+
while p.is_alive():
|
15 |
+
if keyboard.is_pressed('enter'):
|
16 |
+
p.terminate()
|
17 |
+
else:
|
18 |
+
continue
|
19 |
+
p.join()
|
20 |
+
|
21 |
+
|
22 |
+
def record_audio(filename, sec, sr = 44100):
|
23 |
+
audio = sd.rec(int(sec * sr), samplerate=sr, channels=2, blocking=False)
|
24 |
+
sd.wait()
|
25 |
+
write(filename, sr, audio)
|
26 |
+
|
27 |
+
def record_audio_manual(filename, sr = 44100):
|
28 |
+
input(" ** Press enter to start recording **")
|
29 |
+
audio = sd.rec(int(10 * sr), samplerate=sr, channels=2)
|
30 |
+
input(" ** Press enter to stop recording **")
|
31 |
+
sd.stop()
|
32 |
+
write(filename, sr, audio)
|
33 |
+
|
34 |
+
def play_audio(filename):
|
35 |
+
signal, sr = af.read(filename)
|
36 |
+
sd.play(signal, sr)
|
37 |
+
|
38 |
+
def transcribe_audio(filename):
|
39 |
+
audio_file= open(filename, "rb")
|
40 |
+
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
41 |
+
audio_file.close()
|
42 |
+
return transcript
|
43 |
+
|
44 |
+
def translate_audio(filename):
|
45 |
+
audio_file= open(filename, "rb")
|
46 |
+
translation = openai.Audio.translate("whisper-1", audio_file)
|
47 |
+
audio_file.close()
|
48 |
+
return translation
|
49 |
+
|
50 |
+
def save_text_as_audio(text, audio_filename):
|
51 |
+
myobj = gTTS(text=text, lang='en', slow=False)
|
52 |
+
myobj.save(audio_filename)
|