MiniChat-3B / app.py
Samuel L Meyers
print the response
0453504
raw
history blame
1.86 kB
import logging
from typing import cast
from threading import Lock
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from conversation import get_default_conv_template
import gradio as gr
from llama_cpp import Llama
import json
"""
model = Model(model_path='/path/to/model.bin')
while True:
try:
prompt = input("You: ", flush=True)
if prompt == '':
continue
print(f"AI:", end='')
for token in model.generate(prompt):
print(f"{token}", end='', flush=True)
print()
except KeyboardInterrupt:
break
"""
from huggingface_hub import hf_hub_download
model_path = "minichat-3b.q8_0.gguf"
mdlpath = hf_hub_download(repo_id="afrideva/MiniChat-3B-GGUF", filename=model_path)
lcpp_model = Llama(model_path=mdlpath)
def m3b_talk(text):
resp = ""
formattedQuery = "<s> [|User|]" + text + "</s> [|Assistant|]"
for token in lcpp_model(formattedQuery, stop=["[|User|]", "\n"], echo=True):
resp += token
print(jsn)
jsn = json.loads(resp)
answer = jsn["choices"][0]["text"].replace(formattedQuery, "")
return answer
def main():
logging.basicConfig(level=logging.INFO)
with gr.Blocks() as demo:
with gr.Row(variant="panel"):
gr.Markdown("## Talk to MiniChat-3B\n\nTalk to MiniChat-3B.")
with gr.Row(variant="panel"):
with gr.Column(variant="panel"):
m3b_talk_input = gr.Textbox(label="Message", placeholder="Type something here...")
with gr.Column(variant="panel"):
m3b_talk_output = gr.Textbox()
m3b_talk_btn = gr.Button("Send")
m3b_talk_btn.click(m3b_talk, inputs=m3b_talk_input, outputs=m3b_talk_output, api_name="talk_m3b")
demo.queue(concurrency_count=1).launch()
if __name__ == "__main__":
main()