Spaces:

Matt-CB
/

Language_AI_Practice

Sleeping

App Files Files Community

Matt-CB commited on Mar 20, 2024

Commit

6250169

1 Parent(s): e2ca7d4

v0.1

Browse files

Files changed (8) hide show

.gitignore +162 -0
QuickAgent.py +219 -0
app.py +16 -0
building_blocks/llm.py +27 -0
building_blocks/speech_to_text_streaming.py +94 -0
building_blocks/text_to_speech.py +98 -0
requirements.txt +89 -0
system_prompt.txt +4 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,162 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+scratch

QuickAgent.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import asyncio
+from dotenv import load_dotenv
+import shutil
+import subprocess
+import requests
+import time
+import os
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_groq import ChatGroq
+from langchain_openai import ChatOpenAI
+from langchain.memory import ConversationBufferMemory
+from langchain.prompts import (
+    ChatPromptTemplate,
+    MessagesPlaceholder,
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+)
+from langchain.chains import LLMChain
+from deepgram import (
+    DeepgramClient,
+    DeepgramClientOptions,
+    LiveTranscriptionEvents,
+    LiveOptions,
+    Microphone,
+)
+load_dotenv()
+class LanguageModelProcessor:
+    def __init__(self, groq_api_key):
+        self.llm = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768", groq_api_key=groq_api_key)
+        self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+        # Load the system prompt from a file
+        with open('system_prompt.txt', 'r') as file:
+            system_prompt = file.read().strip()
+        self.prompt = ChatPromptTemplate.from_messages([
+            SystemMessagePromptTemplate.from_template(system_prompt),
+            MessagesPlaceholder(variable_name="chat_history"),
+            HumanMessagePromptTemplate.from_template("{text}")
+        ])
+        self.conversation = LLMChain(
+            llm=self.llm,
+            prompt=self.prompt,
+            memory=self.memory
+        )
+    def process(self, text):
+        self.memory.chat_memory.add_user_message(text)  # Add user message to memory
+        start_time = time.time()
+        # Go get the response from the LLM
+        response = self.conversation.invoke({"text": text})
+        end_time = time.time()
+        self.memory.chat_memory.add_ai_message(response['text'])  # Add AI response to memory
+        elapsed_time = int((end_time - start_time) * 1000)
+        print(f"LLM ({elapsed_time}ms): {response['text']}")
+        return response['text']
+class TextToSpeech:
+    def __init__(self, deepgram_api_key):
+        self.DG_API_KEY = deepgram_api_key
+    # Set your Deepgram API Key and desired voice model
+    MODEL_NAME = "aura-helios-en"  # Example model name, change as needed
+    @staticmethod
+    def is_installed(lib_name: str) -> bool:
+        lib = shutil.which(lib_name)
+        return lib is not None
+    def speak(self, text):
+        if not self.is_installed("ffplay"):
+            raise ValueError("ffplay not found, necessary to stream audio.")
+        DEEPGRAM_URL = f"https://api.deepgram.com/v1/speak?model={self.MODEL_NAME}&performance=some&encoding=linear16&sample_rate=24000"
+        headers = {
+            "Authorization": f"Token {self.DG_API_KEY}",
+            "Content-Type": "application/json"
+        }
+        payload = {
+            "text": text
+        }
+        player_command = ["ffplay", "-autoexit", "-", "-nodisp"]
+        player_process = subprocess.Popen(
+            player_command,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+        start_time = time.time()  # Record the time before sending the request
+        first_byte_time = None  # Initialize a variable to store the time when the first byte is received
+        with requests.post(DEEPGRAM_URL, stream=True, headers=headers, json=payload) as r:
+            for chunk in r.iter_content(chunk_size=1024):
+                if chunk:
+                    if first_byte_time is None:  # Check if this is the first chunk received
+                        first_byte_time = time.time()  # Record the time when the first byte is received
+                        ttfb = int((first_byte_time - start_time)*1000)  # Calculate the time to first byte
+                        print(f"TTS Time to First Byte (TTFB): {ttfb}ms\n")
+                    player_process.stdin.write(chunk)
+                    player_process.stdin.flush()
+        if player_process.stdin:
+            player_process.stdin.close()
+        player_process.wait()
+class TranscriptCollector:
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        self.transcript_parts = []
+    def add_part(self, part):
+        self.transcript_parts.append(part)
+    def get_full_transcript(self):
+        return ' '.join(self.transcript_parts)
+transcript_collector = TranscriptCollector()
+async def get_transcript(callback):
+    transcription_complete = asyncio.Event()  # Event to signal transcription completion
+    try:
+        # example of setting up a client config. logging values: WARNING, VERBOSE, DEBUG, SPAM
+        config = DeepgramClientOptions(options={"keepalive": "true"})
+        deepgram: DeepgramClient = DeepgramClient("", config)
+        dg_connection = deepgram.listen.asynclive.v("1")
+        print ("Listening...")
+        async def on_message(self, result, **kwargs):
+            sentence = result.channel.alternatives[0].transcript
+            if not result.speech_final:
+                transcript_collector.add_part(sentence)
+            else:
+                # This is the final part of the current sentence
+                transcript_collector.add_part(sentence)
+                full_sentence = transcript_collector.get_full_transcript()
+                # Check if the full_sentence is not empty before printing
+                if len(full_sentence.strip()) > 0:
+                    full_sentence = full_sentence.strip()
+                    print(f"Human: {full_sentence}")
+                    callback(full_sentence)  # Call the callback with the full_sentence
+                    transcript_collector.reset()
+                    transcription_complete.set()  # Signal to stop transcription and exit
+        dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
+        options = LiveOptions(
+            model="nova-2",
+            punctuate=True,
+            language="en-US",
+            encoding="linear16",
+            channels=1,
+            sample_rate=16000,
+            endpointing=300,
+            smart_format=True,
+        )
+        await dg_connection.start(options)
+        # Open a microphone stream on the default input device
+        microphone = Microphone(dg_connection.send)
+        microphone.start()
+        await transcription_complete.wait()  # Wait for the transcription to complete instead of looping indefinitely
+        # Wait for the microphone to close
+        microphone.finish()
+        # Indicate that we've finished
+        await dg_connection.finish()
+    except Exception as e:
+        print(f"Could not open socket: {e}")
+        return
+class ConversationManager:
+    def __init__(self):
+        self.transcription_response = ""
+        self.llm = LanguageModelProcessor()
+    async def main(self):
+        def handle_full_sentence(full_sentence):
+            self.transcription_response = full_sentence
+        # Loop indefinitely until "goodbye" is detected
+        while True:
+            await get_transcript(handle_full_sentence)
+            # Check for "goodbye" to exit the loop
+            if "goodbye" in self.transcription_response.lower():
+                break
+            llm_response = self.llm.process(self.transcription_response)
+            tts = TextToSpeech()
+            tts.speak(llm_response)
+            # Reset transcription_response for the next loop iteration
+            self.transcription_response = ""
+if __name__ == "__main__":
+    manager = ConversationManager()
+    asyncio.run(manager.main())

app.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import streamlit as st
+from QuickAgent import LanguageModelProcessor, TextToSpeech
+st.title('AI Language Teacher')
+groq_api_key = st.text_input('Enter your GROQ_API_KEY')
+deepgram_api_key = st.text_input('Enter your DEEPGRAM_API_KEY')
+if st.button('Start'):
+    if groq_api_key and deepgram_api_key:
+        processor = LanguageModelProcessor(groq_api_key=groq_api_key)
+        tts = TextToSpeech(deepgram_api_key=deepgram_api_key)
+        # Aquí iría la lógica para procesar o iniciar el procesamiento de la conversación
+        st.write("Processing started")
+    else:
+        st.write("Please enter both API keys")

building_blocks/llm.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from langchain_core.prompts import ChatPromptTemplate
+from langchain_groq import ChatGroq
+from dotenv import load_dotenv
+import os
+load_dotenv()
+def batch():
+    chat = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768", groq_api_key=os.getenv("GROQ_API_KEY"))
+    system = "You are a helpful assistant."
+    human = "{text}"
+    prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])
+    chain = prompt | chat
+    print(chain.invoke({"text": "Explain the importance of low latency LLMs."}))
+# Streaming
+def streaming():
+    chat = ChatGroq(temperature=0, model_name="llama2-70b-4096",groq_api_key=os.getenv("GROQ_API_KEY"))
+    prompt = ChatPromptTemplate.from_messages([("human", "Write a very long poem about {topic}")])
+    chain = prompt | chat
+    for chunk in chain.stream({"topic": "The Moon"}):
+        print(chunk.content, end="", flush=True)
+# batch()
+streaming()

building_blocks/speech_to_text_streaming.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import asyncio
+from dotenv import load_dotenv
+from deepgram import (
+    DeepgramClient,
+    DeepgramClientOptions,
+    LiveTranscriptionEvents,
+    LiveOptions,
+    Microphone,
+)
+load_dotenv()
+class TranscriptCollector:
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        self.transcript_parts = []
+    def add_part(self, part):
+        self.transcript_parts.append(part)
+    def get_full_transcript(self):
+        return ' '.join(self.transcript_parts)
+transcript_collector = TranscriptCollector()
+async def get_transcript():
+    try:
+        config = DeepgramClientOptions(options={"keepalive": "true"})
+        deepgram: DeepgramClient = DeepgramClient("", config)
+        dg_connection = deepgram.listen.asynclive.v("1")
+        async def on_message(self, result, **kwargs):
+            # print (result)
+            sentence = result.channel.alternatives[0].transcript
+            print (result)
+            if not result.speech_final:
+                transcript_collector.add_part(sentence)
+            else:
+                # This is the final part of the current sentence
+                transcript_collector.add_part(sentence)
+                full_sentence = transcript_collector.get_full_transcript()
+                print(f"speaker: {full_sentence}")
+                # Reset the collector for the next sentence
+                transcript_collector.reset()
+        async def on_error(self, error, **kwargs):
+            print(f"\n\n{error}\n\n")
+        dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
+        dg_connection.on(LiveTranscriptionEvents.Error, on_error)
+        options = LiveOptions(
+            model="nova-2",
+            punctuate=True,
+            language="en-US",
+            encoding="linear16",
+            channels=1,
+            sample_rate=16000,
+            endpointing=True
+        )
+        await dg_connection.start(options)
+        # Open a microphone stream on the default input device
+        microphone = Microphone(dg_connection.send)
+        # start microphone
+        microphone.start()
+        while True:
+            if not microphone.is_active():
+                break
+            await asyncio.sleep(1)
+        # Wait for the microphone to close
+        microphone.finish()
+        # Indicate that we've finished
+        dg_connection.finish()
+        print("Finished")
+    except Exception as e:
+        print(f"Could not open socket: {e}")
+        return
+if __name__ == "__main__":
+    asyncio.run(get_transcript())

building_blocks/text_to_speech.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import os
+import requests
+from dotenv import load_dotenv
+import subprocess
+import shutil
+import time
+from deepgram import Deepgram
+# brew install portaudio
+# Load environment variables
+load_dotenv()
+# Set your Deepgram API Key and desired voice model
+DG_API_KEY = os.getenv("DEEPGRAM_API_KEY")
+MODEL_NAME = "alpha-stella-en-v2"  # Example model name, change as needed
+def is_installed(lib_name: str) -> bool:
+    lib = shutil.which(lib_name)
+    return lib is not None
+def play_stream(audio_stream, use_ffmpeg=True):
+    player = "ffplay"
+    if not is_installed(player):
+        raise ValueError(f"{player} not found, necessary to stream audio.")
+    player_command = ["ffplay", "-autoexit", "-", "-nodisp"]
+    player_process = subprocess.Popen(
+        player_command,
+        stdin=subprocess.PIPE,
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+    for chunk in audio_stream:
+        if chunk:
+            player_process.stdin.write(chunk)  # type: ignore
+            player_process.stdin.flush()  # type: ignore
+    if player_process.stdin:
+        player_process.stdin.close()
+    player_process.wait()
+def send_tts_request(text):
+    DEEPGRAM_URL = f"https://api.beta.deepgram.com/v1/speak?model={MODEL_NAME}&performance=some&encoding=linear16&sample_rate=24000"
+    headers = {
+        "Authorization": f"Token {DG_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    payload = {
+        "text": text,
+        "voice": MODEL_NAME
+    }
+    start_time = time.time()  # Record the time before sending the request
+    first_byte_time = None  # Initialize a variable to store the time when the first byte is received
+    # Initialize the player process here, before receiving the stream
+    player = "ffplay"
+    if not is_installed(player):
+        raise ValueError(f"{player} not found, necessary to stream audio.")
+    player_command = ["ffplay", "-autoexit", "-", "-nodisp"]
+    player_process = subprocess.Popen(
+        player_command,
+        stdin=subprocess.PIPE,
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+    start_time = time.time()  # Record the time before sending the request
+    first_byte_time = None  # Initialize a variable to store the time when the first byte is received
+    with requests.post(DEEPGRAM_URL, stream=True, headers=headers, json=payload) as r:
+        # dg_performance_total_ms = r.headers.get('x-dg-performance-total-ms', 'Not Available')
+        # print(f"Deepgram Performance Total (ms): {dg_performance_total_ms}ms")
+        for chunk in r.iter_content(chunk_size=1024):
+            if chunk:
+                if first_byte_time is None:  # Check if this is the first chunk received
+                    first_byte_time = time.time()  # Record the time when the first byte is received
+                    ttfb = int((first_byte_time - start_time)*1000)  # Calculate the time to first byte
+                    print(f"Time to First Byte (TTFB): {ttfb}ms")
+                # Write each chunk to the player's stdin immediately
+                player_process.stdin.write(chunk)  # type: ignore
+                player_process.stdin.flush()  # type: ignore
+    # Close the player's stdin and wait for the process to finish
+    if player_process.stdin:
+        player_process.stdin.close()
+    player_process.wait()
+# Example usage with saving to file
+text = """
+The returns for performance are superlinear."""
+send_tts_request(text)

requirements.txt ADDED Viewed

	@@ -0,0 +1,89 @@

+aiohttp==3.9.3
+aiosignal==1.3.1
+annotated-types==0.6.0
+anyio==4.3.0
+appnope==0.1.4
+asttokens==2.4.1
+asyncio==3.4.3
+attrs==23.2.0
+certifi==2024.2.2
+cffi==1.16.0
+charset-normalizer==3.3.2
+comm==0.2.1
+dataclasses-json==0.6.4
+debugpy==1.8.1
+decorator==5.1.1
+deepgram-sdk==3.1.4
+distro==1.9.0
+executing==2.0.1
+frozenlist==1.4.1
+gevent==24.2.1
+greenlet==3.0.3
+groq==0.4.1
+h11==0.14.0
+httpcore==1.0.4
+httpx==0.27.0
+idna==3.6
+ipykernel==6.29.2
+ipython==8.22.1
+jedi==0.19.1
+jsonpatch==1.33
+jsonpointer==2.4
+jupyter_client==8.6.0
+jupyter_core==5.7.1
+langchain==0.1.9
+langchain-community==0.0.24
+langchain-core==0.1.26
+langchain-groq==0.0.1
+langchain-openai==0.0.7
+langsmith==0.1.6
+marshmallow==3.20.2
+matplotlib-inline==0.1.6
+multidict==6.0.5
+mypy-extensions==1.0.0
+nest-asyncio==1.6.0
+numpy==1.26.4
+openai==1.12.0
+orjson==3.9.15
+packaging==23.2
+parso==0.8.3
+pexpect==4.9.0
+platformdirs==4.2.0
+prompt-toolkit==3.0.43
+psutil==5.9.8
+ptyprocess==0.7.0
+pure-eval==0.2.2
+PyAudio==0.2.14
+pycparser==2.21
+pydantic==2.6.2
+pydantic_core==2.16.3
+pygame==2.5.2
+Pygments==2.17.2
+python-dateutil==2.8.2
+python-dotenv==1.0.1
+PyYAML==6.0.1
+pyzmq==25.1.2
+regex==2023.12.25
+requests==2.31.0
+six==1.16.0
+sniffio==1.3.0
+sounddevice==0.4.6
+SQLAlchemy==2.0.27
+stack-data==0.6.3
+tenacity==8.2.3
+tiktoken==0.6.0
+tornado==6.4
+tqdm==4.66.2
+traitlets==5.14.1
+typing-inspect==0.9.0
+typing_extensions==4.9.0
+urllib3==2.2.1
+verboselogs==1.7
+wcwidth==0.2.13
+websocket==0.2.1
+websocket-client==1.7.0
+websockets==12.0
+yarl==1.9.4
+zope.event==5.0
+zope.interface==6.2
+streamlit

system_prompt.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+You are a conversational teacher language assistant named Eliza.
+Use short, conversational responses as if you're having a live conversation.
+Your response should be under 20 words.
+Do not respond with any code, only conversation