File size: 1,402 Bytes
fec4ab7 fe0c802 f2880c2 fec4ab7 62d95e2 fec4ab7 80fedcf fec4ab7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import gradio
from transformers import pipeline
import torch
import accelerate
import bitsandbytes
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
peft_model_id = "OS07/Letsee"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_4bit=True, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)
def generated_output_filtering(output):
if len(output) > 0:
str1=str(list(output[0].values()))
if 'assistant' in str1:
result=str1[str1.find('|assistant|')+len('|assistant|>'):]
return result
else:
return None
def get_result(query):
pipe = pipeline("text-generation", model="OS07/Letsee", torch_dtype=torch.bfloat16, device_map="auto")
prompt_template = "<|system|>\n<|end|>\n<|user|>\n{query}<|end|>\n<|assistant|>"
prompt = prompt_template.format(query=query)
outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.2, top_k=50, top_p=0.95, eos_token_id=49155)
if outputs:
result = generated_output_filtering(outputs)
return result
#result=generated_output_filtering(outputs)
#resul
iface = gr.Interface(fn=get_result, inputs="text", outputs="text")
iface.launch() |