|
import gradio as gr |
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
|
|
def load_model(model_id): |
|
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_id, |
|
torch_dtype=torch.float16, |
|
device_map="auto", |
|
trust_remote_code=True |
|
) |
|
return model, tokenizer |
|
|
|
def generate_response(instruction, model, tokenizer, max_length=200, temperature=0.7, top_p=0.9): |
|
|
|
input_text = f"### Instruction:\n{instruction}\n\n### Response:\n" |
|
|
|
|
|
inputs = tokenizer(input_text, return_tensors="pt").to(model.device) |
|
|
|
|
|
outputs = model.generate( |
|
**inputs, |
|
max_length=max_length, |
|
temperature=temperature, |
|
top_p=top_p, |
|
num_return_sequences=1, |
|
pad_token_id=tokenizer.eos_token_id |
|
) |
|
|
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
response_parts = response.split("### Response:") |
|
if len(response_parts) > 1: |
|
return response_parts[1].strip() |
|
return response.strip() |
|
|
|
def create_demo(): |
|
|
|
model_id = "jatingocodeo/phi2-finetuned-openassistant" |
|
|
|
|
|
model, tokenizer = load_model(model_id) |
|
|
|
|
|
def process_input(instruction, max_length, temperature, top_p): |
|
return generate_response( |
|
instruction, |
|
model, |
|
tokenizer, |
|
max_length=max_length, |
|
temperature=temperature, |
|
top_p=top_p |
|
) |
|
|
|
|
|
demo = gr.Interface( |
|
fn=process_input, |
|
inputs=[ |
|
gr.Textbox( |
|
label="Instruction", |
|
placeholder="Enter your instruction here...", |
|
lines=4 |
|
), |
|
gr.Slider( |
|
minimum=50, |
|
maximum=500, |
|
value=200, |
|
step=10, |
|
label="Maximum Length" |
|
), |
|
gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.7, |
|
step=0.1, |
|
label="Temperature" |
|
), |
|
gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.9, |
|
step=0.1, |
|
label="Top P" |
|
) |
|
], |
|
outputs=gr.Textbox(label="Response", lines=8), |
|
title="Phi-2 Fine-tuned Assistant", |
|
description="""This is a fine-tuned version of the Microsoft Phi-2 model, trained on the OpenAssistant dataset. |
|
You can adjust the generation parameters: |
|
- **Maximum Length**: Controls the maximum length of the generated response |
|
- **Temperature**: Higher values make the output more random, lower values make it more focused |
|
- **Top P**: Controls the cumulative probability threshold for token sampling |
|
""", |
|
examples=[ |
|
["What is machine learning?"], |
|
["Write a short poem about artificial intelligence"], |
|
["Explain quantum computing to a 10-year-old"], |
|
["What are the best practices for writing clean code?"] |
|
] |
|
) |
|
return demo |
|
|
|
if __name__ == "__main__": |
|
demo = create_demo() |
|
demo.launch() |