brobit / app.py
OS07's picture
Update app.py
f2880c2 verified
raw
history blame
1.4 kB
import gradio
from transformers import pipeline
import torch
import accelerate
import bitsandbytes
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
peft_model_id = "OS07/Letsee"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_4bit=True, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)
def generated_output_filtering(output):
if len(output) > 0:
str1=str(list(output[0].values()))
if 'assistant' in str1:
result=str1[str1.find('|assistant|')+len('|assistant|>'):]
return result
else:
return None
def get_result(query):
pipe = pipeline("text-generation", model="OS07/Letsee", torch_dtype=torch.bfloat16, device_map="auto")
prompt_template = "<|system|>\n<|end|>\n<|user|>\n{query}<|end|>\n<|assistant|>"
prompt = prompt_template.format(query=query)
outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.2, top_k=50, top_p=0.95, eos_token_id=49155)
if outputs:
result = generated_output_filtering(outputs)
return result
#result=generated_output_filtering(outputs)
#resul
iface = gr.Interface(fn=get_result, inputs="text", outputs="text")
iface.launch()