|
import gradio |
|
from transformers import pipeline |
|
import torch |
|
import accelerate |
|
import bitsandbytes |
|
|
|
from peft import PeftModel, PeftConfig |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
peft_model_id = "OS07/Letsee" |
|
config = PeftConfig.from_pretrained(peft_model_id) |
|
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_4bit=True, device_map='auto') |
|
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) |
|
|
|
|
|
model = PeftModel.from_pretrained(model, peft_model_id) |
|
|
|
def generated_output_filtering(output): |
|
if len(output) > 0: |
|
str1=str(list(output[0].values())) |
|
if 'assistant' in str1: |
|
result=str1[str1.find('|assistant|')+len('|assistant|>'):] |
|
return result |
|
else: |
|
return None |
|
|
|
def get_result(query): |
|
pipe = pipeline("text-generation", model="OS07/Letsee", torch_dtype=torch.bfloat16, device_map="auto") |
|
prompt_template = "<|system|>\n<|end|>\n<|user|>\n{query}<|end|>\n<|assistant|>" |
|
prompt = prompt_template.format(query=query) |
|
outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.2, top_k=50, top_p=0.95, eos_token_id=49155) |
|
if outputs: |
|
result = generated_output_filtering(outputs) |
|
return result |
|
|
|
|
|
|
|
|
|
|
|
iface = gr.Interface(fn=get_result, inputs="text", outputs="text") |
|
iface.launch() |