import gradio from transformers import pipeline import torch import accelerate import bitsandbytes from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer peft_model_id = "OS07/Letsee" config = PeftConfig.from_pretrained(peft_model_id) model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_4bit=True, device_map='auto') tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) # Load the Lora model model = PeftModel.from_pretrained(model, peft_model_id) def generated_output_filtering(output): if len(output) > 0: str1=str(list(output[0].values())) if 'assistant' in str1: result=str1[str1.find('|assistant|')+len('|assistant|>'):] return result else: return None def get_result(query): pipe = pipeline("text-generation", model="OS07/Letsee", torch_dtype=torch.bfloat16, device_map="auto") prompt_template = "<|system|>\n<|end|>\n<|user|>\n{query}<|end|>\n<|assistant|>" prompt = prompt_template.format(query=query) outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.2, top_k=50, top_p=0.95, eos_token_id=49155) if outputs: result = generated_output_filtering(outputs) return result #result=generated_output_filtering(outputs) #resul iface = gr.Interface(fn=get_result, inputs="text", outputs="text") iface.launch()