Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,19 +10,30 @@ def generate_response(message, temperature, max_length):
|
|
10 |
# Tokenize input
|
11 |
inputs = tokenizer(message, return_tensors="pt", truncation=True, max_length=512)
|
12 |
|
13 |
-
#
|
|
|
|
|
|
|
14 |
with torch.no_grad():
|
15 |
-
|
16 |
inputs["input_ids"],
|
17 |
max_length=max_length,
|
18 |
temperature=temperature,
|
19 |
do_sample=True,
|
20 |
pad_token_id=tokenizer.eos_token_id,
|
21 |
num_return_sequences=1,
|
|
|
|
|
22 |
)
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
# Create the Gradio interface
|
28 |
demo = gr.Interface(
|
@@ -46,7 +57,7 @@ demo = gr.Interface(
|
|
46 |
- Start with a clear and detailed prompt
|
47 |
- Adjust temperature: Higher for creative writing, lower for factual completion
|
48 |
- Adjust max length based on how much text you want to generate
|
49 |
-
"""
|
50 |
)
|
51 |
|
52 |
if __name__ == "__main__":
|
|
|
10 |
# Tokenize input
|
11 |
inputs = tokenizer(message, return_tensors="pt", truncation=True, max_length=512)
|
12 |
|
13 |
+
# Initialize the generated text with the input message
|
14 |
+
generated_text = message
|
15 |
+
|
16 |
+
# Generate response token by token
|
17 |
with torch.no_grad():
|
18 |
+
generated_ids = model.generate(
|
19 |
inputs["input_ids"],
|
20 |
max_length=max_length,
|
21 |
temperature=temperature,
|
22 |
do_sample=True,
|
23 |
pad_token_id=tokenizer.eos_token_id,
|
24 |
num_return_sequences=1,
|
25 |
+
return_dict_in_generate=True,
|
26 |
+
output_scores=True,
|
27 |
)
|
28 |
+
|
29 |
+
# Get the generated token ids (excluding the input prompt)
|
30 |
+
new_tokens = generated_ids.sequences[0][inputs["input_ids"].shape[1]:]
|
31 |
+
|
32 |
+
# Decode and yield tokens one by one
|
33 |
+
for i in range(len(new_tokens)):
|
34 |
+
next_token = tokenizer.decode(new_tokens[:i+1], skip_special_tokens=True)
|
35 |
+
generated_text += next_token
|
36 |
+
yield generated_text
|
37 |
|
38 |
# Create the Gradio interface
|
39 |
demo = gr.Interface(
|
|
|
57 |
- Start with a clear and detailed prompt
|
58 |
- Adjust temperature: Higher for creative writing, lower for factual completion
|
59 |
- Adjust max length based on how much text you want to generate
|
60 |
+
""",
|
61 |
)
|
62 |
|
63 |
if __name__ == "__main__":
|