jatingocodeo commited on
Commit
743bf1d
·
verified ·
1 Parent(s): 448a8ab

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -0
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+
5
+ # Load model and tokenizer
6
+ def load_model(model_id):
7
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_id,
10
+ torch_dtype=torch.float16,
11
+ device_map="auto",
12
+ trust_remote_code=True
13
+ )
14
+ return model, tokenizer
15
+
16
+ def generate_response(instruction, model, tokenizer, max_length=200, temperature=0.7, top_p=0.9):
17
+ # Format the input text
18
+ input_text = f"### Instruction:\n{instruction}\n\n### Response:\n"
19
+
20
+ # Tokenize input
21
+ inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
22
+
23
+ # Generate response
24
+ outputs = model.generate(
25
+ **inputs,
26
+ max_length=max_length,
27
+ temperature=temperature,
28
+ top_p=top_p,
29
+ num_return_sequences=1,
30
+ pad_token_id=tokenizer.eos_token_id
31
+ )
32
+
33
+ # Decode and return the response
34
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
35
+
36
+ # Extract only the response part
37
+ response_parts = response.split("### Response:")
38
+ if len(response_parts) > 1:
39
+ return response_parts[1].strip()
40
+ return response.strip()
41
+
42
+ def create_demo():
43
+ # Use your uploaded model
44
+ model_id = "jatingocodeo/phi2-finetuned-openassistant"
45
+
46
+ # Load model and tokenizer
47
+ model, tokenizer = load_model(model_id)
48
+
49
+ # Define the interface
50
+ def process_input(instruction, max_length, temperature, top_p):
51
+ return generate_response(
52
+ instruction,
53
+ model,
54
+ tokenizer,
55
+ max_length=max_length,
56
+ temperature=temperature,
57
+ top_p=top_p
58
+ )
59
+
60
+ # Create the interface
61
+ demo = gr.Interface(
62
+ fn=process_input,
63
+ inputs=[
64
+ gr.Textbox(
65
+ label="Instruction",
66
+ placeholder="Enter your instruction here...",
67
+ lines=4
68
+ ),
69
+ gr.Slider(
70
+ minimum=50,
71
+ maximum=500,
72
+ value=200,
73
+ step=10,
74
+ label="Maximum Length"
75
+ ),
76
+ gr.Slider(
77
+ minimum=0.1,
78
+ maximum=1.0,
79
+ value=0.7,
80
+ step=0.1,
81
+ label="Temperature"
82
+ ),
83
+ gr.Slider(
84
+ minimum=0.1,
85
+ maximum=1.0,
86
+ value=0.9,
87
+ step=0.1,
88
+ label="Top P"
89
+ )
90
+ ],
91
+ outputs=gr.Textbox(label="Response", lines=8),
92
+ title="Phi-2 Fine-tuned Assistant",
93
+ description="""This is a fine-tuned version of the Microsoft Phi-2 model, trained on the OpenAssistant dataset.
94
+ You can adjust the generation parameters:
95
+ - **Maximum Length**: Controls the maximum length of the generated response
96
+ - **Temperature**: Higher values make the output more random, lower values make it more focused
97
+ - **Top P**: Controls the cumulative probability threshold for token sampling
98
+ """,
99
+ examples=[
100
+ ["What is machine learning?"],
101
+ ["Write a short poem about artificial intelligence"],
102
+ ["Explain quantum computing to a 10-year-old"],
103
+ ["What are the best practices for writing clean code?"]
104
+ ]
105
+ )
106
+ return demo
107
+
108
+ if __name__ == "__main__":
109
+ demo = create_demo()
110
+ demo.launch()