Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,17 +2,19 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
|
5 |
-
# Load
|
6 |
-
model_name = "
|
7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
|
9 |
|
10 |
def generate_text(input_text):
|
11 |
# Ensure input is in the correct format
|
12 |
input_tensor = tokenizer(input_text, return_tensors="pt", clean_up_tokenization_spaces=True)
|
13 |
-
|
|
|
|
|
14 |
response = tokenizer.decode(output[0], skip_special_tokens=True)
|
15 |
return response
|
16 |
|
17 |
iface = gr.Interface(fn=generate_text, inputs="text", outputs="text", allow_flagging="never")
|
18 |
-
iface.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
|
5 |
+
# Load a smaller model to reduce memory usage
|
6 |
+
model_name = "distilgpt2" # Smaller model
|
7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
|
9 |
|
10 |
def generate_text(input_text):
|
11 |
# Ensure input is in the correct format
|
12 |
input_tensor = tokenizer(input_text, return_tensors="pt", clean_up_tokenization_spaces=True)
|
13 |
+
|
14 |
+
# Generate text with a limit on max_length to reduce memory usage
|
15 |
+
output = model.generate(**input_tensor, max_length=50) # Adjust max_length as needed
|
16 |
response = tokenizer.decode(output[0], skip_special_tokens=True)
|
17 |
return response
|
18 |
|
19 |
iface = gr.Interface(fn=generate_text, inputs="text", outputs="text", allow_flagging="never")
|
20 |
+
iface.launch(server_name="0.0.0.0", server_port=7860)
|