shukdevdatta123 commited on
Commit
5232378
·
verified ·
1 Parent(s): 94ba8ed

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +70 -0
  2. text_speech_utils.py +52 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import openai
4
+ from text_speech_utils import * # Assuming this module exists for your audio functionality
5
+
6
+ # Define filenames for audio and conversation output
7
+ input_audio_filename = 'input.wav'
8
+ output_audio_filename = 'chatgpt_response.wav'
9
+ output_conversation_filename = 'ChatGPT_conversation.txt'
10
+
11
+ # Initialize app
12
+ if 'messages' not in st.session_state:
13
+ st.session_state['messages'] = [{"role": "system", "content": "You are a helpful assistant."}]
14
+
15
+ # Allow user to input OpenAI API Key via Streamlit text input
16
+ openai.api_key = st.text_input("Enter your OpenAI API Key", type="password")
17
+
18
+ # Display a warning if API key is not provided
19
+ if not openai.api_key:
20
+ st.warning("Please enter your OpenAI API key to proceed.")
21
+
22
+ # UI components
23
+ st.title("My awesome personal assistant")
24
+ sec = st.slider("Select number of seconds of recording", min_value=2, max_value=8, value=5)
25
+
26
+ # Record audio + transcribe with Whisper + get GPT-3 response
27
+ if st.button('Record audio'):
28
+ if openai.api_key: # Proceed only if API key is provided
29
+ st.write("Recording...")
30
+ record_audio(input_audio_filename, sec)
31
+
32
+ transcription = transcribe_audio(input_audio_filename)
33
+ st.write(f"Me: {transcription['text']}")
34
+ st.session_state['messages'].append({"role": "user", "content": transcription['text']})
35
+
36
+ bot = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=st.session_state['messages'])
37
+ response = bot.choices[0].message.content
38
+ st.write(f"GPT: {response}")
39
+
40
+ save_text_as_audio(response, output_audio_filename)
41
+ play_audio(output_audio_filename)
42
+
43
+ st.session_state['messages'].append({"role": "assistant", "content": response})
44
+ else:
45
+ st.error("API key is required to interact with GPT.")
46
+
47
+ # # Download conversation button
48
+ # st.download_button(label="Download conversation",
49
+ # data=pd.DataFrame(st.session_state['messages']).to_csv(index=False).encode('utf-8'),
50
+ # file_name=output_conversation_filename)
51
+
52
+ # Function to generate conversation as plain text
53
+ def generate_conversation_text(messages):
54
+ conversation_text = ""
55
+ for message in messages:
56
+ if message["role"] == "user":
57
+ conversation_text += f"Me: {message['content']}\n"
58
+ elif message["role"] == "assistant":
59
+ conversation_text += f"GPT: {message['content']}\n"
60
+ elif message["role"] == "system":
61
+ conversation_text += f"System: {message['content']}\n"
62
+ return conversation_text
63
+
64
+ # Download conversation button
65
+ st.download_button(
66
+ label="Download conversation",
67
+ data=generate_conversation_text(st.session_state['messages']).encode('utf-8'),
68
+ file_name=output_conversation_filename,
69
+ mime="text/plain"
70
+ )
text_speech_utils.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import sounddevice as sd
3
+ import audiofile as af
4
+ from scipy.io.wavfile import write
5
+ from gtts import gTTS
6
+
7
+ import multiprocessing
8
+ import pyttsx3
9
+ import keyboard
10
+
11
+ def say(text):
12
+ p = multiprocessing.Process(target=pyttsx3.speak, args=(text,))
13
+ p.start()
14
+ while p.is_alive():
15
+ if keyboard.is_pressed('enter'):
16
+ p.terminate()
17
+ else:
18
+ continue
19
+ p.join()
20
+
21
+
22
+ def record_audio(filename, sec, sr = 44100):
23
+ audio = sd.rec(int(sec * sr), samplerate=sr, channels=2, blocking=False)
24
+ sd.wait()
25
+ write(filename, sr, audio)
26
+
27
+ def record_audio_manual(filename, sr = 44100):
28
+ input(" ** Press enter to start recording **")
29
+ audio = sd.rec(int(10 * sr), samplerate=sr, channels=2)
30
+ input(" ** Press enter to stop recording **")
31
+ sd.stop()
32
+ write(filename, sr, audio)
33
+
34
+ def play_audio(filename):
35
+ signal, sr = af.read(filename)
36
+ sd.play(signal, sr)
37
+
38
+ def transcribe_audio(filename):
39
+ audio_file= open(filename, "rb")
40
+ transcript = openai.Audio.transcribe("whisper-1", audio_file)
41
+ audio_file.close()
42
+ return transcript
43
+
44
+ def translate_audio(filename):
45
+ audio_file= open(filename, "rb")
46
+ translation = openai.Audio.translate("whisper-1", audio_file)
47
+ audio_file.close()
48
+ return translation
49
+
50
+ def save_text_as_audio(text, audio_filename):
51
+ myobj = gTTS(text=text, lang='en', slow=False)
52
+ myobj.save(audio_filename)