File size: 1,402 Bytes
fec4ab7
 
 
fe0c802
f2880c2
fec4ab7
 
 
 
 
 
 
 
 
 
 
 
62d95e2
 
 
 
 
 
 
 
 
fec4ab7
 
 
 
 
 
 
 
 
 
 
80fedcf
fec4ab7
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import gradio
from transformers import pipeline
import torch
import accelerate
import bitsandbytes

from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = "OS07/Letsee"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_4bit=True, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

def generated_output_filtering(output):
  if len(output) > 0:
    str1=str(list(output[0].values()))
    if 'assistant' in str1:
      result=str1[str1.find('|assistant|')+len('|assistant|>'):]
      return result
  else:
    return None

def get_result(query):
  pipe = pipeline("text-generation", model="OS07/Letsee", torch_dtype=torch.bfloat16, device_map="auto")
  prompt_template = "<|system|>\n<|end|>\n<|user|>\n{query}<|end|>\n<|assistant|>"
  prompt = prompt_template.format(query=query)
  outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.2, top_k=50, top_p=0.95, eos_token_id=49155)
  if outputs:
    result = generated_output_filtering(outputs)
    return result


#result=generated_output_filtering(outputs)
#resul

iface = gr.Interface(fn=get_result, inputs="text", outputs="text")
iface.launch()