MiniChat-3B / app.py
Samuel L Meyers
m3b
fe36794
raw
history blame
1.98 kB
import logging
from typing import cast
from threading import Lock
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from conversation import get_default_conv_template
import gradio as gr
from pyllamacpp.model import Model
from urllib import request, response, urlopen, error, parse, robotparser
"""
model = Model(model_path='/path/to/model.bin')
while True:
try:
prompt = input("You: ", flush=True)
if prompt == '':
continue
print(f"AI:", end='')
for token in model.generate(prompt):
print(f"{token}", end='', flush=True)
print()
except KeyboardInterrupt:
break
"""
talkers = {
"m3b": {
"tokenizer": AutoTokenizer.from_pretrained("GeneZC/MiniChat-3B", use_fast=False),
"model": AutoModelForCausalLM.from_pretrained("GeneZC/MiniChat-3B", device_map="auto", low_cpu_mem_usage=True),
"conv": get_default_conv_template("minichat")
}
}
request.urlretrieve("https://huggingface.co/GGUF/MiniChat-3B/resolve/main/ggml-model-q8_0.bin", "minichat-3b-q8_0.gguf")
lcpp_model = Model(model_path="minichat-3b-q8_0.gguf")
def m3b_talk(text):
resp = ""
for token in lcpp_model.generate(text):
resp += token
return resp
def main():
logging.basicConfig(level=logging.INFO)
with gr.Blocks() as demo:
with gr.Row(variant="panel"):
gr.Markdown("## Talk to MiniChat-3B\n\nTalk to MiniChat-3B.")
with gr.Row(variant="panel"):
with gr.Column(variant="panel"):
m3b_talk_input = gr.Textbox(label="Message", placeholder="Type something here...")
with gr.Column(variant="panel"):
m3b_talk_output = gr.Textbox()
m3b_talk_btn = gr.Button("Send")
m3b_talk_btn.click(m3b_talk, inputs=m3b_talk_input, outputs=m3b_talk_output, api_name="talk_m3b")
demo.queue(concurrency_count=1).launch()
if __name__ == "__main__":
main()