Spaces:
Sleeping
Sleeping
v0.1
Browse files- .gitignore +162 -0
- QuickAgent.py +219 -0
- app.py +16 -0
- building_blocks/llm.py +27 -0
- building_blocks/speech_to_text_streaming.py +94 -0
- building_blocks/text_to_speech.py +98 -0
- requirements.txt +89 -0
- system_prompt.txt +4 -0
.gitignore
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
# PyCharm
|
156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
#.idea/
|
161 |
+
|
162 |
+
scratch
|
QuickAgent.py
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
import shutil
|
4 |
+
import subprocess
|
5 |
+
import requests
|
6 |
+
import time
|
7 |
+
import os
|
8 |
+
|
9 |
+
from langchain_core.prompts import ChatPromptTemplate
|
10 |
+
from langchain_groq import ChatGroq
|
11 |
+
from langchain_openai import ChatOpenAI
|
12 |
+
from langchain.memory import ConversationBufferMemory
|
13 |
+
from langchain.prompts import (
|
14 |
+
ChatPromptTemplate,
|
15 |
+
MessagesPlaceholder,
|
16 |
+
SystemMessagePromptTemplate,
|
17 |
+
HumanMessagePromptTemplate,
|
18 |
+
)
|
19 |
+
from langchain.chains import LLMChain
|
20 |
+
|
21 |
+
from deepgram import (
|
22 |
+
DeepgramClient,
|
23 |
+
DeepgramClientOptions,
|
24 |
+
LiveTranscriptionEvents,
|
25 |
+
LiveOptions,
|
26 |
+
Microphone,
|
27 |
+
)
|
28 |
+
|
29 |
+
load_dotenv()
|
30 |
+
|
31 |
+
class LanguageModelProcessor:
|
32 |
+
def __init__(self, groq_api_key):
|
33 |
+
self.llm = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768", groq_api_key=groq_api_key)
|
34 |
+
|
35 |
+
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
36 |
+
|
37 |
+
# Load the system prompt from a file
|
38 |
+
with open('system_prompt.txt', 'r') as file:
|
39 |
+
system_prompt = file.read().strip()
|
40 |
+
|
41 |
+
self.prompt = ChatPromptTemplate.from_messages([
|
42 |
+
SystemMessagePromptTemplate.from_template(system_prompt),
|
43 |
+
MessagesPlaceholder(variable_name="chat_history"),
|
44 |
+
HumanMessagePromptTemplate.from_template("{text}")
|
45 |
+
])
|
46 |
+
|
47 |
+
self.conversation = LLMChain(
|
48 |
+
llm=self.llm,
|
49 |
+
prompt=self.prompt,
|
50 |
+
memory=self.memory
|
51 |
+
)
|
52 |
+
|
53 |
+
def process(self, text):
|
54 |
+
self.memory.chat_memory.add_user_message(text) # Add user message to memory
|
55 |
+
|
56 |
+
start_time = time.time()
|
57 |
+
|
58 |
+
# Go get the response from the LLM
|
59 |
+
response = self.conversation.invoke({"text": text})
|
60 |
+
end_time = time.time()
|
61 |
+
|
62 |
+
self.memory.chat_memory.add_ai_message(response['text']) # Add AI response to memory
|
63 |
+
|
64 |
+
elapsed_time = int((end_time - start_time) * 1000)
|
65 |
+
print(f"LLM ({elapsed_time}ms): {response['text']}")
|
66 |
+
return response['text']
|
67 |
+
|
68 |
+
class TextToSpeech:
|
69 |
+
def __init__(self, deepgram_api_key):
|
70 |
+
self.DG_API_KEY = deepgram_api_key
|
71 |
+
# Set your Deepgram API Key and desired voice model
|
72 |
+
|
73 |
+
MODEL_NAME = "aura-helios-en" # Example model name, change as needed
|
74 |
+
|
75 |
+
@staticmethod
|
76 |
+
def is_installed(lib_name: str) -> bool:
|
77 |
+
lib = shutil.which(lib_name)
|
78 |
+
return lib is not None
|
79 |
+
|
80 |
+
def speak(self, text):
|
81 |
+
if not self.is_installed("ffplay"):
|
82 |
+
raise ValueError("ffplay not found, necessary to stream audio.")
|
83 |
+
|
84 |
+
DEEPGRAM_URL = f"https://api.deepgram.com/v1/speak?model={self.MODEL_NAME}&performance=some&encoding=linear16&sample_rate=24000"
|
85 |
+
headers = {
|
86 |
+
"Authorization": f"Token {self.DG_API_KEY}",
|
87 |
+
"Content-Type": "application/json"
|
88 |
+
}
|
89 |
+
payload = {
|
90 |
+
"text": text
|
91 |
+
}
|
92 |
+
|
93 |
+
player_command = ["ffplay", "-autoexit", "-", "-nodisp"]
|
94 |
+
player_process = subprocess.Popen(
|
95 |
+
player_command,
|
96 |
+
stdin=subprocess.PIPE,
|
97 |
+
stdout=subprocess.DEVNULL,
|
98 |
+
stderr=subprocess.DEVNULL,
|
99 |
+
)
|
100 |
+
|
101 |
+
start_time = time.time() # Record the time before sending the request
|
102 |
+
first_byte_time = None # Initialize a variable to store the time when the first byte is received
|
103 |
+
|
104 |
+
with requests.post(DEEPGRAM_URL, stream=True, headers=headers, json=payload) as r:
|
105 |
+
for chunk in r.iter_content(chunk_size=1024):
|
106 |
+
if chunk:
|
107 |
+
if first_byte_time is None: # Check if this is the first chunk received
|
108 |
+
first_byte_time = time.time() # Record the time when the first byte is received
|
109 |
+
ttfb = int((first_byte_time - start_time)*1000) # Calculate the time to first byte
|
110 |
+
print(f"TTS Time to First Byte (TTFB): {ttfb}ms\n")
|
111 |
+
player_process.stdin.write(chunk)
|
112 |
+
player_process.stdin.flush()
|
113 |
+
|
114 |
+
if player_process.stdin:
|
115 |
+
player_process.stdin.close()
|
116 |
+
player_process.wait()
|
117 |
+
|
118 |
+
class TranscriptCollector:
|
119 |
+
def __init__(self):
|
120 |
+
self.reset()
|
121 |
+
|
122 |
+
def reset(self):
|
123 |
+
self.transcript_parts = []
|
124 |
+
|
125 |
+
def add_part(self, part):
|
126 |
+
self.transcript_parts.append(part)
|
127 |
+
|
128 |
+
def get_full_transcript(self):
|
129 |
+
return ' '.join(self.transcript_parts)
|
130 |
+
|
131 |
+
transcript_collector = TranscriptCollector()
|
132 |
+
|
133 |
+
async def get_transcript(callback):
|
134 |
+
transcription_complete = asyncio.Event() # Event to signal transcription completion
|
135 |
+
|
136 |
+
try:
|
137 |
+
# example of setting up a client config. logging values: WARNING, VERBOSE, DEBUG, SPAM
|
138 |
+
config = DeepgramClientOptions(options={"keepalive": "true"})
|
139 |
+
deepgram: DeepgramClient = DeepgramClient("", config)
|
140 |
+
|
141 |
+
dg_connection = deepgram.listen.asynclive.v("1")
|
142 |
+
print ("Listening...")
|
143 |
+
|
144 |
+
async def on_message(self, result, **kwargs):
|
145 |
+
sentence = result.channel.alternatives[0].transcript
|
146 |
+
|
147 |
+
if not result.speech_final:
|
148 |
+
transcript_collector.add_part(sentence)
|
149 |
+
else:
|
150 |
+
# This is the final part of the current sentence
|
151 |
+
transcript_collector.add_part(sentence)
|
152 |
+
full_sentence = transcript_collector.get_full_transcript()
|
153 |
+
# Check if the full_sentence is not empty before printing
|
154 |
+
if len(full_sentence.strip()) > 0:
|
155 |
+
full_sentence = full_sentence.strip()
|
156 |
+
print(f"Human: {full_sentence}")
|
157 |
+
callback(full_sentence) # Call the callback with the full_sentence
|
158 |
+
transcript_collector.reset()
|
159 |
+
transcription_complete.set() # Signal to stop transcription and exit
|
160 |
+
|
161 |
+
dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
|
162 |
+
|
163 |
+
options = LiveOptions(
|
164 |
+
model="nova-2",
|
165 |
+
punctuate=True,
|
166 |
+
language="en-US",
|
167 |
+
encoding="linear16",
|
168 |
+
channels=1,
|
169 |
+
sample_rate=16000,
|
170 |
+
endpointing=300,
|
171 |
+
smart_format=True,
|
172 |
+
)
|
173 |
+
|
174 |
+
await dg_connection.start(options)
|
175 |
+
|
176 |
+
# Open a microphone stream on the default input device
|
177 |
+
microphone = Microphone(dg_connection.send)
|
178 |
+
microphone.start()
|
179 |
+
|
180 |
+
await transcription_complete.wait() # Wait for the transcription to complete instead of looping indefinitely
|
181 |
+
|
182 |
+
# Wait for the microphone to close
|
183 |
+
microphone.finish()
|
184 |
+
|
185 |
+
# Indicate that we've finished
|
186 |
+
await dg_connection.finish()
|
187 |
+
|
188 |
+
except Exception as e:
|
189 |
+
print(f"Could not open socket: {e}")
|
190 |
+
return
|
191 |
+
|
192 |
+
class ConversationManager:
|
193 |
+
def __init__(self):
|
194 |
+
self.transcription_response = ""
|
195 |
+
self.llm = LanguageModelProcessor()
|
196 |
+
|
197 |
+
async def main(self):
|
198 |
+
def handle_full_sentence(full_sentence):
|
199 |
+
self.transcription_response = full_sentence
|
200 |
+
|
201 |
+
# Loop indefinitely until "goodbye" is detected
|
202 |
+
while True:
|
203 |
+
await get_transcript(handle_full_sentence)
|
204 |
+
|
205 |
+
# Check for "goodbye" to exit the loop
|
206 |
+
if "goodbye" in self.transcription_response.lower():
|
207 |
+
break
|
208 |
+
|
209 |
+
llm_response = self.llm.process(self.transcription_response)
|
210 |
+
|
211 |
+
tts = TextToSpeech()
|
212 |
+
tts.speak(llm_response)
|
213 |
+
|
214 |
+
# Reset transcription_response for the next loop iteration
|
215 |
+
self.transcription_response = ""
|
216 |
+
|
217 |
+
if __name__ == "__main__":
|
218 |
+
manager = ConversationManager()
|
219 |
+
asyncio.run(manager.main())
|
app.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from QuickAgent import LanguageModelProcessor, TextToSpeech
|
3 |
+
|
4 |
+
st.title('AI Language Teacher')
|
5 |
+
|
6 |
+
groq_api_key = st.text_input('Enter your GROQ_API_KEY')
|
7 |
+
deepgram_api_key = st.text_input('Enter your DEEPGRAM_API_KEY')
|
8 |
+
|
9 |
+
if st.button('Start'):
|
10 |
+
if groq_api_key and deepgram_api_key:
|
11 |
+
processor = LanguageModelProcessor(groq_api_key=groq_api_key)
|
12 |
+
tts = TextToSpeech(deepgram_api_key=deepgram_api_key)
|
13 |
+
# Aquí iría la lógica para procesar o iniciar el procesamiento de la conversación
|
14 |
+
st.write("Processing started")
|
15 |
+
else:
|
16 |
+
st.write("Please enter both API keys")
|
building_blocks/llm.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_core.prompts import ChatPromptTemplate
|
2 |
+
from langchain_groq import ChatGroq
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import os
|
5 |
+
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
def batch():
|
9 |
+
chat = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768", groq_api_key=os.getenv("GROQ_API_KEY"))
|
10 |
+
|
11 |
+
system = "You are a helpful assistant."
|
12 |
+
human = "{text}"
|
13 |
+
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])
|
14 |
+
|
15 |
+
chain = prompt | chat
|
16 |
+
print(chain.invoke({"text": "Explain the importance of low latency LLMs."}))
|
17 |
+
|
18 |
+
# Streaming
|
19 |
+
def streaming():
|
20 |
+
chat = ChatGroq(temperature=0, model_name="llama2-70b-4096",groq_api_key=os.getenv("GROQ_API_KEY"))
|
21 |
+
prompt = ChatPromptTemplate.from_messages([("human", "Write a very long poem about {topic}")])
|
22 |
+
chain = prompt | chat
|
23 |
+
for chunk in chain.stream({"topic": "The Moon"}):
|
24 |
+
print(chunk.content, end="", flush=True)
|
25 |
+
|
26 |
+
# batch()
|
27 |
+
streaming()
|
building_blocks/speech_to_text_streaming.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
|
4 |
+
from deepgram import (
|
5 |
+
DeepgramClient,
|
6 |
+
DeepgramClientOptions,
|
7 |
+
LiveTranscriptionEvents,
|
8 |
+
LiveOptions,
|
9 |
+
Microphone,
|
10 |
+
)
|
11 |
+
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
class TranscriptCollector:
|
15 |
+
def __init__(self):
|
16 |
+
self.reset()
|
17 |
+
|
18 |
+
def reset(self):
|
19 |
+
self.transcript_parts = []
|
20 |
+
|
21 |
+
def add_part(self, part):
|
22 |
+
self.transcript_parts.append(part)
|
23 |
+
|
24 |
+
def get_full_transcript(self):
|
25 |
+
return ' '.join(self.transcript_parts)
|
26 |
+
|
27 |
+
transcript_collector = TranscriptCollector()
|
28 |
+
|
29 |
+
async def get_transcript():
|
30 |
+
try:
|
31 |
+
config = DeepgramClientOptions(options={"keepalive": "true"})
|
32 |
+
deepgram: DeepgramClient = DeepgramClient("", config)
|
33 |
+
|
34 |
+
dg_connection = deepgram.listen.asynclive.v("1")
|
35 |
+
|
36 |
+
async def on_message(self, result, **kwargs):
|
37 |
+
# print (result)
|
38 |
+
sentence = result.channel.alternatives[0].transcript
|
39 |
+
|
40 |
+
print (result)
|
41 |
+
|
42 |
+
if not result.speech_final:
|
43 |
+
transcript_collector.add_part(sentence)
|
44 |
+
else:
|
45 |
+
# This is the final part of the current sentence
|
46 |
+
transcript_collector.add_part(sentence)
|
47 |
+
full_sentence = transcript_collector.get_full_transcript()
|
48 |
+
print(f"speaker: {full_sentence}")
|
49 |
+
# Reset the collector for the next sentence
|
50 |
+
transcript_collector.reset()
|
51 |
+
|
52 |
+
async def on_error(self, error, **kwargs):
|
53 |
+
print(f"\n\n{error}\n\n")
|
54 |
+
|
55 |
+
dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
|
56 |
+
dg_connection.on(LiveTranscriptionEvents.Error, on_error)
|
57 |
+
|
58 |
+
options = LiveOptions(
|
59 |
+
model="nova-2",
|
60 |
+
punctuate=True,
|
61 |
+
language="en-US",
|
62 |
+
encoding="linear16",
|
63 |
+
channels=1,
|
64 |
+
sample_rate=16000,
|
65 |
+
endpointing=True
|
66 |
+
)
|
67 |
+
|
68 |
+
await dg_connection.start(options)
|
69 |
+
|
70 |
+
# Open a microphone stream on the default input device
|
71 |
+
microphone = Microphone(dg_connection.send)
|
72 |
+
|
73 |
+
# start microphone
|
74 |
+
microphone.start()
|
75 |
+
|
76 |
+
while True:
|
77 |
+
if not microphone.is_active():
|
78 |
+
break
|
79 |
+
await asyncio.sleep(1)
|
80 |
+
|
81 |
+
# Wait for the microphone to close
|
82 |
+
microphone.finish()
|
83 |
+
|
84 |
+
# Indicate that we've finished
|
85 |
+
dg_connection.finish()
|
86 |
+
|
87 |
+
print("Finished")
|
88 |
+
|
89 |
+
except Exception as e:
|
90 |
+
print(f"Could not open socket: {e}")
|
91 |
+
return
|
92 |
+
|
93 |
+
if __name__ == "__main__":
|
94 |
+
asyncio.run(get_transcript())
|
building_blocks/text_to_speech.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import subprocess
|
5 |
+
import shutil
|
6 |
+
import time
|
7 |
+
from deepgram import Deepgram
|
8 |
+
|
9 |
+
# brew install portaudio
|
10 |
+
|
11 |
+
# Load environment variables
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
# Set your Deepgram API Key and desired voice model
|
15 |
+
DG_API_KEY = os.getenv("DEEPGRAM_API_KEY")
|
16 |
+
MODEL_NAME = "alpha-stella-en-v2" # Example model name, change as needed
|
17 |
+
|
18 |
+
def is_installed(lib_name: str) -> bool:
|
19 |
+
lib = shutil.which(lib_name)
|
20 |
+
return lib is not None
|
21 |
+
|
22 |
+
def play_stream(audio_stream, use_ffmpeg=True):
|
23 |
+
player = "ffplay"
|
24 |
+
if not is_installed(player):
|
25 |
+
raise ValueError(f"{player} not found, necessary to stream audio.")
|
26 |
+
|
27 |
+
player_command = ["ffplay", "-autoexit", "-", "-nodisp"]
|
28 |
+
player_process = subprocess.Popen(
|
29 |
+
player_command,
|
30 |
+
stdin=subprocess.PIPE,
|
31 |
+
stdout=subprocess.DEVNULL,
|
32 |
+
stderr=subprocess.DEVNULL,
|
33 |
+
)
|
34 |
+
|
35 |
+
for chunk in audio_stream:
|
36 |
+
if chunk:
|
37 |
+
player_process.stdin.write(chunk) # type: ignore
|
38 |
+
player_process.stdin.flush() # type: ignore
|
39 |
+
|
40 |
+
if player_process.stdin:
|
41 |
+
player_process.stdin.close()
|
42 |
+
player_process.wait()
|
43 |
+
|
44 |
+
def send_tts_request(text):
|
45 |
+
DEEPGRAM_URL = f"https://api.beta.deepgram.com/v1/speak?model={MODEL_NAME}&performance=some&encoding=linear16&sample_rate=24000"
|
46 |
+
|
47 |
+
headers = {
|
48 |
+
"Authorization": f"Token {DG_API_KEY}",
|
49 |
+
"Content-Type": "application/json"
|
50 |
+
}
|
51 |
+
|
52 |
+
payload = {
|
53 |
+
"text": text,
|
54 |
+
"voice": MODEL_NAME
|
55 |
+
}
|
56 |
+
|
57 |
+
start_time = time.time() # Record the time before sending the request
|
58 |
+
first_byte_time = None # Initialize a variable to store the time when the first byte is received
|
59 |
+
|
60 |
+
# Initialize the player process here, before receiving the stream
|
61 |
+
player = "ffplay"
|
62 |
+
if not is_installed(player):
|
63 |
+
raise ValueError(f"{player} not found, necessary to stream audio.")
|
64 |
+
|
65 |
+
player_command = ["ffplay", "-autoexit", "-", "-nodisp"]
|
66 |
+
player_process = subprocess.Popen(
|
67 |
+
player_command,
|
68 |
+
stdin=subprocess.PIPE,
|
69 |
+
stdout=subprocess.DEVNULL,
|
70 |
+
stderr=subprocess.DEVNULL,
|
71 |
+
)
|
72 |
+
|
73 |
+
start_time = time.time() # Record the time before sending the request
|
74 |
+
first_byte_time = None # Initialize a variable to store the time when the first byte is received
|
75 |
+
|
76 |
+
with requests.post(DEEPGRAM_URL, stream=True, headers=headers, json=payload) as r:
|
77 |
+
# dg_performance_total_ms = r.headers.get('x-dg-performance-total-ms', 'Not Available')
|
78 |
+
# print(f"Deepgram Performance Total (ms): {dg_performance_total_ms}ms")
|
79 |
+
|
80 |
+
for chunk in r.iter_content(chunk_size=1024):
|
81 |
+
if chunk:
|
82 |
+
if first_byte_time is None: # Check if this is the first chunk received
|
83 |
+
first_byte_time = time.time() # Record the time when the first byte is received
|
84 |
+
ttfb = int((first_byte_time - start_time)*1000) # Calculate the time to first byte
|
85 |
+
print(f"Time to First Byte (TTFB): {ttfb}ms")
|
86 |
+
# Write each chunk to the player's stdin immediately
|
87 |
+
player_process.stdin.write(chunk) # type: ignore
|
88 |
+
player_process.stdin.flush() # type: ignore
|
89 |
+
|
90 |
+
# Close the player's stdin and wait for the process to finish
|
91 |
+
if player_process.stdin:
|
92 |
+
player_process.stdin.close()
|
93 |
+
player_process.wait()
|
94 |
+
|
95 |
+
# Example usage with saving to file
|
96 |
+
text = """
|
97 |
+
The returns for performance are superlinear."""
|
98 |
+
send_tts_request(text)
|
requirements.txt
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiohttp==3.9.3
|
2 |
+
aiosignal==1.3.1
|
3 |
+
annotated-types==0.6.0
|
4 |
+
anyio==4.3.0
|
5 |
+
appnope==0.1.4
|
6 |
+
asttokens==2.4.1
|
7 |
+
asyncio==3.4.3
|
8 |
+
attrs==23.2.0
|
9 |
+
certifi==2024.2.2
|
10 |
+
cffi==1.16.0
|
11 |
+
charset-normalizer==3.3.2
|
12 |
+
comm==0.2.1
|
13 |
+
dataclasses-json==0.6.4
|
14 |
+
debugpy==1.8.1
|
15 |
+
decorator==5.1.1
|
16 |
+
deepgram-sdk==3.1.4
|
17 |
+
distro==1.9.0
|
18 |
+
executing==2.0.1
|
19 |
+
frozenlist==1.4.1
|
20 |
+
gevent==24.2.1
|
21 |
+
greenlet==3.0.3
|
22 |
+
groq==0.4.1
|
23 |
+
h11==0.14.0
|
24 |
+
httpcore==1.0.4
|
25 |
+
httpx==0.27.0
|
26 |
+
idna==3.6
|
27 |
+
ipykernel==6.29.2
|
28 |
+
ipython==8.22.1
|
29 |
+
jedi==0.19.1
|
30 |
+
jsonpatch==1.33
|
31 |
+
jsonpointer==2.4
|
32 |
+
jupyter_client==8.6.0
|
33 |
+
jupyter_core==5.7.1
|
34 |
+
langchain==0.1.9
|
35 |
+
langchain-community==0.0.24
|
36 |
+
langchain-core==0.1.26
|
37 |
+
langchain-groq==0.0.1
|
38 |
+
langchain-openai==0.0.7
|
39 |
+
langsmith==0.1.6
|
40 |
+
marshmallow==3.20.2
|
41 |
+
matplotlib-inline==0.1.6
|
42 |
+
multidict==6.0.5
|
43 |
+
mypy-extensions==1.0.0
|
44 |
+
nest-asyncio==1.6.0
|
45 |
+
numpy==1.26.4
|
46 |
+
openai==1.12.0
|
47 |
+
orjson==3.9.15
|
48 |
+
packaging==23.2
|
49 |
+
parso==0.8.3
|
50 |
+
pexpect==4.9.0
|
51 |
+
platformdirs==4.2.0
|
52 |
+
prompt-toolkit==3.0.43
|
53 |
+
psutil==5.9.8
|
54 |
+
ptyprocess==0.7.0
|
55 |
+
pure-eval==0.2.2
|
56 |
+
PyAudio==0.2.14
|
57 |
+
pycparser==2.21
|
58 |
+
pydantic==2.6.2
|
59 |
+
pydantic_core==2.16.3
|
60 |
+
pygame==2.5.2
|
61 |
+
Pygments==2.17.2
|
62 |
+
python-dateutil==2.8.2
|
63 |
+
python-dotenv==1.0.1
|
64 |
+
PyYAML==6.0.1
|
65 |
+
pyzmq==25.1.2
|
66 |
+
regex==2023.12.25
|
67 |
+
requests==2.31.0
|
68 |
+
six==1.16.0
|
69 |
+
sniffio==1.3.0
|
70 |
+
sounddevice==0.4.6
|
71 |
+
SQLAlchemy==2.0.27
|
72 |
+
stack-data==0.6.3
|
73 |
+
tenacity==8.2.3
|
74 |
+
tiktoken==0.6.0
|
75 |
+
tornado==6.4
|
76 |
+
tqdm==4.66.2
|
77 |
+
traitlets==5.14.1
|
78 |
+
typing-inspect==0.9.0
|
79 |
+
typing_extensions==4.9.0
|
80 |
+
urllib3==2.2.1
|
81 |
+
verboselogs==1.7
|
82 |
+
wcwidth==0.2.13
|
83 |
+
websocket==0.2.1
|
84 |
+
websocket-client==1.7.0
|
85 |
+
websockets==12.0
|
86 |
+
yarl==1.9.4
|
87 |
+
zope.event==5.0
|
88 |
+
zope.interface==6.2
|
89 |
+
streamlit
|
system_prompt.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
You are a conversational teacher language assistant named Eliza.
|
2 |
+
Use short, conversational responses as if you're having a live conversation.
|
3 |
+
Your response should be under 20 words.
|
4 |
+
Do not respond with any code, only conversation
|