File size: 3,751 Bytes
743bf1d
 
 
ee3c970
743bf1d
 
 
ee3c970
 
743bf1d
ee3c970
 
743bf1d
 
 
 
ee3c970
 
 
743bf1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee3c970
743bf1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee3c970
76dd423
ee3c970
743bf1d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# Load model and tokenizer
def load_model(model_id):
    # First load the base model
    base_model_id = "microsoft/phi-2"
    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_id,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )
    
    # Load and merge the LoRA adapter
    model = PeftModel.from_pretrained(base_model, model_id)
    return model, tokenizer

def generate_response(instruction, model, tokenizer, max_length=200, temperature=0.7, top_p=0.9):
    # Format the input text
    input_text = f"### Instruction:\n{instruction}\n\n### Response:\n"
    
    # Tokenize input
    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
    
    # Generate response
    outputs = model.generate(
        **inputs,
        max_length=max_length,
        temperature=temperature,
        top_p=top_p,
        num_return_sequences=1,
        pad_token_id=tokenizer.eos_token_id
    )
    
    # Decode and return the response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract only the response part
    response_parts = response.split("### Response:")
    if len(response_parts) > 1:
        return response_parts[1].strip()
    return response.strip()

def create_demo(model_id):
    # Load model and tokenizer
    model, tokenizer = load_model(model_id)
    
    # Define the interface
    def process_input(instruction, max_length, temperature, top_p):
        return generate_response(
            instruction,
            model,
            tokenizer,
            max_length=max_length,
            temperature=temperature,
            top_p=top_p
        )
    
    # Create the interface
    demo = gr.Interface(
        fn=process_input,
        inputs=[
            gr.Textbox(
                label="Instruction",
                placeholder="Enter your instruction here...",
                lines=4
            ),
            gr.Slider(
                minimum=50,
                maximum=500,
                value=200,
                step=10,
                label="Maximum Length"
            ),
            gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.7,
                step=0.1,
                label="Temperature"
            ),
            gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.9,
                step=0.1,
                label="Top P"
            )
        ],
        outputs=gr.Textbox(label="Response", lines=8),
        title="Phi-2 Fine-tuned Assistant",
        description="""This is a fine-tuned version of the Microsoft Phi-2 model, trained on the OpenAssistant dataset.
        You can adjust the generation parameters:
        - **Maximum Length**: Controls the maximum length of the generated response
        - **Temperature**: Higher values make the output more random, lower values make it more focused
        - **Top P**: Controls the cumulative probability threshold for token sampling
        """,
        examples=[
            ["What is machine learning?"],
            ["Write a short poem about artificial intelligence"],
            ["Explain quantum computing to a 10-year-old"],
            ["What are the best practices for writing clean code?"]
        ]
    )
    return demo

if __name__ == "__main__":
    # Replace with your model ID (username/model-name)
    model_id = "jatingocodeo/phi2-finetuned-openassistant"
    demo = create_demo(model_id)
    demo.launch()