File size: 1,982 Bytes
53f8a32
 
f4fe081
da8a172
d487976
 
 
da8a172
fe36794
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc422ae
d487976
 
 
e71462a
d487976
 
 
 
fe36794
 
 
 
d487976
fe36794
 
 
 
d487976
53f8a32
 
 
 
d487976
 
 
 
 
 
 
 
53f8a32
d487976
53f8a32
4263bcd
53f8a32
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import logging
from typing import cast
from threading import Lock
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

from conversation import get_default_conv_template
import gradio as gr
from pyllamacpp.model import Model
from urllib import request, response, urlopen, error, parse, robotparser

"""

model = Model(model_path='/path/to/model.bin')
while True:
    try:
        prompt = input("You: ", flush=True)
        if prompt == '':
            continue
        print(f"AI:", end='')
        for token in model.generate(prompt):
            print(f"{token}", end='', flush=True)
        print()
    except KeyboardInterrupt:
        break
"""

talkers = {
    "m3b": {
        "tokenizer": AutoTokenizer.from_pretrained("GeneZC/MiniChat-3B", use_fast=False),
        "model": AutoModelForCausalLM.from_pretrained("GeneZC/MiniChat-3B", device_map="auto", low_cpu_mem_usage=True),
        "conv": get_default_conv_template("minichat")
    }
}

request.urlretrieve("https://huggingface.co/GGUF/MiniChat-3B/resolve/main/ggml-model-q8_0.bin", "minichat-3b-q8_0.gguf")

lcpp_model = Model(model_path="minichat-3b-q8_0.gguf")

def m3b_talk(text):
    resp = ""
    for token in lcpp_model.generate(text):
        resp += token
    return resp

def main():
    logging.basicConfig(level=logging.INFO)

    with gr.Blocks() as demo:
        with gr.Row(variant="panel"):
            gr.Markdown("## Talk to MiniChat-3B\n\nTalk to MiniChat-3B.")
        with gr.Row(variant="panel"):
            with gr.Column(variant="panel"):
                m3b_talk_input = gr.Textbox(label="Message", placeholder="Type something here...")
            with gr.Column(variant="panel"):
                m3b_talk_output = gr.Textbox()
                m3b_talk_btn = gr.Button("Send")

        m3b_talk_btn.click(m3b_talk, inputs=m3b_talk_input, outputs=m3b_talk_output, api_name="talk_m3b")

    demo.queue(concurrency_count=1).launch()


if __name__ == "__main__":
    main()