Sergidev commited on
Commit
4e84396
Β·
verified Β·
1 Parent(s): 5612076

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -69
app.py CHANGED
@@ -6,9 +6,14 @@ import gradio as gr
6
  import spaces
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
-
10
  DESCRIPTION = """\
11
- # Backend for Sergidev
 
 
 
 
 
 
12
  """
13
 
14
  MAX_MAX_NEW_TOKENS = 2048
@@ -26,31 +31,19 @@ model = AutoModelForCausalLM.from_pretrained(
26
  )
27
  model.eval()
28
 
29
-
30
  @spaces.GPU(duration=90)
31
  def generate(
32
  message: str,
33
- chat_history: list[tuple[str, str]],
34
  max_new_tokens: int = 1024,
35
  temperature: float = 0.6,
36
  top_p: float = 0.9,
37
  top_k: int = 50,
38
  repetition_penalty: float = 1.2,
39
  ) -> Iterator[str]:
40
- conversation = []
41
- for user, assistant in chat_history:
42
- conversation.extend(
43
- [
44
- {"role": "user", "content": user},
45
- {"role": "assistant", "content": assistant},
46
- ]
47
- )
48
- conversation.append({"role": "user", "content": message})
49
-
50
- input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
51
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
52
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
53
- gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
54
  input_ids = input_ids.to(model.device)
55
 
56
  streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
@@ -71,63 +64,86 @@ def generate(
71
  outputs = []
72
  for text in streamer:
73
  outputs.append(text)
74
- yield "".join(outputs)
75
-
76
-
77
- chat_interface = gr.ChatInterface(
78
- fn=generate,
79
- additional_inputs=[
80
- gr.Slider(
81
- label="Max new tokens",
82
- minimum=1,
83
- maximum=MAX_MAX_NEW_TOKENS,
84
- step=1,
85
- value=DEFAULT_MAX_NEW_TOKENS,
86
- ),
87
- gr.Slider(
88
- label="Temperature",
89
- minimum=0.1,
90
- maximum=4.0,
91
- step=0.1,
92
- value=0.6,
93
- ),
94
- gr.Slider(
95
- label="Top-p (nucleus sampling)",
96
- minimum=0.05,
97
- maximum=1.0,
98
- step=0.05,
99
- value=0.9,
100
- ),
101
- gr.Slider(
102
- label="Top-k",
103
- minimum=1,
104
- maximum=1000,
105
- step=1,
106
- value=50,
107
- ),
108
- gr.Slider(
109
- label="Repetition penalty",
110
- minimum=1.0,
111
- maximum=2.0,
112
- step=0.05,
113
- value=1.2,
114
- ),
115
- ],
116
- stop_btn=None,
117
- examples=[
118
- ["Hello there! How are you doing?"],
119
- ["Can you explain briefly to me what is the Python programming language?"],
120
- ["Explain the plot of Cinderella in a sentence."],
121
- ["How many hours does it take a man to eat a Helicopter?"],
122
- ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
123
- ],
124
- cache_examples=False,
125
- )
126
 
127
  with gr.Blocks(css="style.css", fill_height=True) as demo:
128
  gr.Markdown(DESCRIPTION)
129
  gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
130
- chat_interface.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  if __name__ == "__main__":
133
  demo.queue(max_size=20).launch()
 
6
  import spaces
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
9
  DESCRIPTION = """\
10
+ # Qwen 0.5B Text Completion
11
+
12
+ This is a demo of [`Qwen/Qwen2-0.5B-Instruct`](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct), a lightweight language model fine-tuned for instruction following.
13
+
14
+ This space allows you to input text and have the AI complete it. Simply type your text in the input box, click "Complete", and watch as the AI generates a continuation of your text.
15
+
16
+ You can adjust various parameters such as temperature and top-p sampling to control the generation process.
17
  """
18
 
19
  MAX_MAX_NEW_TOKENS = 2048
 
31
  )
32
  model.eval()
33
 
 
34
  @spaces.GPU(duration=90)
35
  def generate(
36
  message: str,
 
37
  max_new_tokens: int = 1024,
38
  temperature: float = 0.6,
39
  top_p: float = 0.9,
40
  top_k: int = 50,
41
  repetition_penalty: float = 1.2,
42
  ) -> Iterator[str]:
43
+ input_ids = tokenizer.encode(message, return_tensors="pt")
 
 
 
 
 
 
 
 
 
 
44
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
45
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
46
+ gr.Warning(f"Trimmed input as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
47
  input_ids = input_ids.to(model.device)
48
 
49
  streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
 
64
  outputs = []
65
  for text in streamer:
66
  outputs.append(text)
67
+ yield message + "".join(outputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  with gr.Blocks(css="style.css", fill_height=True) as demo:
70
  gr.Markdown(DESCRIPTION)
71
  gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
72
+
73
+ with gr.Row():
74
+ with gr.Column(scale=4):
75
+ input_box = gr.Textbox(
76
+ label="Enter your text",
77
+ placeholder="Type your message here...",
78
+ lines=5
79
+ )
80
+ output_box = gr.Textbox(
81
+ label="Completed text",
82
+ lines=10,
83
+ interactive=False
84
+ )
85
+ with gr.Column(scale=1):
86
+ max_new_tokens = gr.Slider(
87
+ label="Max new tokens",
88
+ minimum=1,
89
+ maximum=MAX_MAX_NEW_TOKENS,
90
+ step=1,
91
+ value=DEFAULT_MAX_NEW_TOKENS,
92
+ )
93
+ temperature = gr.Slider(
94
+ label="Temperature",
95
+ minimum=0.1,
96
+ maximum=4.0,
97
+ step=0.1,
98
+ value=0.6,
99
+ )
100
+ top_p = gr.Slider(
101
+ label="Top-p (nucleus sampling)",
102
+ minimum=0.05,
103
+ maximum=1.0,
104
+ step=0.05,
105
+ value=0.9,
106
+ )
107
+ top_k = gr.Slider(
108
+ label="Top-k",
109
+ minimum=1,
110
+ maximum=1000,
111
+ step=1,
112
+ value=50,
113
+ )
114
+ repetition_penalty = gr.Slider(
115
+ label="Repetition penalty",
116
+ minimum=1.0,
117
+ maximum=2.0,
118
+ step=0.05,
119
+ value=1.2,
120
+ )
121
+
122
+ complete_btn = gr.Button("Complete")
123
+
124
+ complete_btn.click(
125
+ fn=generate,
126
+ inputs=[
127
+ input_box,
128
+ max_new_tokens,
129
+ temperature,
130
+ top_p,
131
+ top_k,
132
+ repetition_penalty
133
+ ],
134
+ outputs=output_box
135
+ )
136
+
137
+ gr.Examples(
138
+ examples=[
139
+ "Hello there! How are you doing?",
140
+ "Can you explain briefly to me what is the Python programming language?",
141
+ "Explain the plot of Cinderella in a sentence.",
142
+ "How many hours does it take a man to eat a Helicopter?",
143
+ "Write a 100-word article on 'Benefits of Open-Source in AI research'",
144
+ ],
145
+ inputs=input_box
146
+ )
147
 
148
  if __name__ == "__main__":
149
  demo.queue(max_size=20).launch()