File size: 1,254 Bytes
873d734
2acd2cc
 
873d734
2acd2cc
4b21ac1
2acd2cc
c2930d6
2acd2cc
 
c2930d6
2acd2cc
 
c2930d6
2acd2cc
 
 
c2930d6
2acd2cc
 
873d734
2acd2cc
 
 
 
 
873d734
2acd2cc
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
from peft import PeftModel, LoraConfig

# Define the path where the model and adapters are saved
model_path = "yentinglin/Llama-3-Taiwan-8B-Instruct"  # Update this to your model path
adapter_path = "netmouse/Llama-3-Taiwan-8B-Instruct-finetuning-by-promisedchat"  # Assuming adapter is stored in the same path

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Load the base model config
config = AutoConfig.from_pretrained(model_path)

# Load the base model without quantization configurations
# Ensure that bitsandbytes is not used by removing any reference to 4bit or 8bit
base_model = AutoModelForCausalLM.from_pretrained(model_path, config=config, ignore_mismatched_sizes=True)

# Load the LoRA adapter
model = PeftModel.from_pretrained(base_model, adapter_path)

def generate_text(input_text):
    input_ids = tokenizer.encode(input_text, return_tensors='pt')
    outputs = model.generate(input_ids, max_length=50, num_return_sequences=1)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text

iface = gr.Interface(fn=generate_text, inputs="text", outputs="text")
iface.launch()