File size: 1,149 Bytes
e27c8af c90040d e27c8af 31def74 c90040d 31def74 c90040d 31def74 e27c8af c90040d 31def74 e27c8af c90040d 31def74 c90040d 31def74 e27c8af 31def74 e27c8af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from peft import PeftModel
# 1) Load the original base model & tokenizer
BASE_MODEL = "facebook/blenderbot-400M-distill"
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
base_model = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL)
# 2) Load your fine-tuned LoRA adapter on top
ADAPTER_REPO = "abinashnp/bayedger-chatbot"
model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)
# 3) Wrap that in a text2text pipeline
chatbot = pipeline(
"text2text-generation",
model=model,
tokenizer=tokenizer,
device_map="auto", # leave out device arg when using accelerate device_map
)
def respond(query):
out = chatbot(
f"question: {query} answer:",
max_new_tokens=150,
temperature=1.0,
top_p=0.9,
repetition_penalty=1.1,
num_beams=1
)[0]["generated_text"]
return out
with gr.Blocks() as demo:
gr.Markdown("# 🤖 Bayedger FAQ Chatbot")
txt = gr.Textbox(label="Ask me anything")
out = gr.Textbox(label="Answer")
txt.submit(respond, txt, out)
demo.launch()
|