File size: 1,149 Bytes
e27c8af
c90040d
 
e27c8af
31def74
c90040d
 
 
 
31def74
c90040d
 
 
31def74
e27c8af
 
c90040d
 
31def74
e27c8af
 
 
c90040d
31def74
 
 
 
 
 
c90040d
31def74
e27c8af
 
31def74
 
e27c8af
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from peft import PeftModel

# 1) Load the original base model & tokenizer
BASE_MODEL = "facebook/blenderbot-400M-distill"
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
base_model = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL)

# 2) Load your fine-tuned LoRA adapter on top
ADAPTER_REPO = "abinashnp/bayedger-chatbot"
model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)

# 3) Wrap that in a text2text pipeline
chatbot = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",       # leave out device arg when using accelerate device_map
)

def respond(query):
    out = chatbot(
        f"question: {query} answer:",
        max_new_tokens=150,
        temperature=1.0,
        top_p=0.9,
        repetition_penalty=1.1,
        num_beams=1
    )[0]["generated_text"]
    return out

with gr.Blocks() as demo:
    gr.Markdown("# 🤖 Bayedger FAQ Chatbot")
    txt = gr.Textbox(label="Ask me anything")
    out = gr.Textbox(label="Answer")
    txt.submit(respond, txt, out)

demo.launch()