Spaces:

MrOvkill
/

MiniChat-3B

Sleeping

App Files Files Community

Samuel L Meyers commited on Nov 17, 2023

Commit

d487976

1 Parent(s): 06ae9a8

AAAAH

Browse files

Files changed (1) hide show

app.py +36 -1

app.py CHANGED Viewed

@@ -9,11 +9,14 @@ import glob
 import logging
 from typing import cast
 from threading import Lock
-from transformers import pipeline
 import gradio as gr
 from balacoon_tts import TTS
 from huggingface_hub import hf_hub_download, list_repo_files
 # locker that disallow access to the tts object from more then one thread
 locker = Lock()
@@ -37,12 +40,35 @@ stt_pipe = pipeline(
     model="openai/whisper-large-v3",
 )
 def transcribe_stt(audio):
     if audio is None:
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
     text = stt_pipe(audio, generate_kwargs={"language": "english", "task": "transcribe"})["text"]
     return text
 def main():
     logging.basicConfig(level=logging.INFO)
@@ -116,6 +142,14 @@ def main():
             with gr.Column(variant="panel"):
                 stt_transcribe_output = gr.Textbox()
                 stt_transcribe_btn = gr.Button("Transcribe")
         def synthesize_audio(text_str: str, model_name_str: str, speaker_str: str):
             """
@@ -144,6 +178,7 @@ def main():
         generate.click(synthesize_audio, inputs=[text, model_name, speaker], outputs=audio, api_name="synthesize")
         stt_transcribe_btn.click(transcribe_stt, inputs=stt_input_file, outputs=stt_transcribe_output, api_name="transcribe")
     demo.queue(concurrency_count=1).launch()

 import logging
 from typing import cast
 from threading import Lock
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
 from balacoon_tts import TTS
 from huggingface_hub import hf_hub_download, list_repo_files
+import torch
+from conversation import get_default_conv_template
 # locker that disallow access to the tts object from more then one thread
 locker = Lock()
     model="openai/whisper-large-v3",
 )
+talkers = {
+    "m3b": {
+        "tokenizer": AutoTokenizer.from_pretrained("GeneZC/MiniChat-3B", use_fast=False),
+        "model": AutoModelForCausalLM.from_pretrained("GeneZC/MiniChat-3B", device_map="auto"),
+        "conv": get_default_conv_template("minichat")
+    }
+}
 def transcribe_stt(audio):
     if audio is None:
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
     text = stt_pipe(audio, generate_kwargs={"language": "english", "task": "transcribe"})["text"]
     return text
+def m3b_talk(text):
+    m3bconv = talkers["m3b"]["conv"]
+    m3bconv.append_message(m3bconv.roles[0], text)
+    m3bconv.append_message(m3bconv.roles[1], None)
+    input_ids = talkers["m3b"]["tokenizer"]([text]).input_ids
+    response_tokens = talkers["m3b"]["model"](
+        torch.as_tensor(m3bconv.get_prompt()),
+        do_sample=True,
+        temperature=0.2,
+        max_new_tokens=1024,
+    )
+    response_tokens = response_tokens[0][len(input_ids[0]):]
+    response = talkers["m3b"]["tokenizer"].decode(response_tokens, skip_special_tokens=True).strip()
+    return response
 def main():
     logging.basicConfig(level=logging.INFO)
             with gr.Column(variant="panel"):
                 stt_transcribe_output = gr.Textbox()
                 stt_transcribe_btn = gr.Button("Transcribe")
+        with gr.Row(variant="panel"):
+            gr.Markdown("## Talk to MiniChat-3B\n\nTalk to MiniChat-3B.")
+        with gr.Row(variant="panel"):
+            with gr.Column(variant="panel"):
+                m3b_talk_input = gr.Textbox(label="Message", placeholder="Type something here...")
+            with gr.Column(variant="panel"):
+                m3b_talk_output = gr.Textbox()
+                m3b_talk_btn = gr.Button("Send")
         def synthesize_audio(text_str: str, model_name_str: str, speaker_str: str):
             """
         generate.click(synthesize_audio, inputs=[text, model_name, speaker], outputs=audio, api_name="synthesize")
         stt_transcribe_btn.click(transcribe_stt, inputs=stt_input_file, outputs=stt_transcribe_output, api_name="transcribe")
+        m3b_talk_btn.click(m3b_talk, inputs=m3b_talk_input, outputs=m3b_talk_output, api_name="talk_m3b")
     demo.queue(concurrency_count=1).launch()