Spaces:
Sleeping
Sleeping
import logging | |
from typing import cast | |
from threading import Lock | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
import torch | |
from conversation import get_default_conv_template | |
import gradio as gr | |
from llama_cpp import Llama | |
import json | |
from huggingface_hub import hf_hub_download | |
model_path = "starling-lm-7b-alpha.Q6_K.gguf" | |
#mdlpath = hf_hub_download(repo_id="afrideva/MiniChat-3B-GGUF", filename=model_path) | |
lcpp_model = Llama(model_path=model_path) | |
global otxt | |
otxt = "" | |
def m3b_talk(text): | |
global otxt | |
resp = "" | |
formattedQuery = "GPT4 User: " + text + "<|end_of_text|>GPT4 Assistant:" | |
r = lcpp_model(formattedQuery, stop=["GPT4 User:", "\n\n\n"], echo=True, stream=True) | |
rfq = False | |
for c in r: | |
otxt += c["choices"][0]["text"] | |
if formattedQuery in otxt and not rfq: | |
otxt.replace(formattedQuery, "") | |
rfq = True | |
else: | |
yield otxt | |
print(resp) | |
return otxt | |
#return resp.replace(formattedQuery, "") | |
def main(): | |
global otxt | |
logging.basicConfig(level=logging.INFO) | |
with gr.Blocks() as demo: | |
with gr.Row(variant="panel"): | |
gr.Markdown("## Talk to MiniChat-3B\n\nTalk to MiniChat-3B.") | |
with gr.Row(variant="panel"): | |
with gr.Column(variant="panel"): | |
m3b_talk_input = gr.Textbox(label="Message", placeholder="Type something here...") | |
with gr.Column(variant="panel"): | |
m3b_talk_output = gr.Textbox() | |
m3b_talk_btn = gr.Button("Send") | |
m3b_talk_btn.click(m3b_talk, inputs=m3b_talk_input, outputs=m3b_talk_output, api_name="talk_m3b") | |
demo.queue().launch() | |
if __name__ == "__main__": | |
main() | |