Arnesh27 commited on
Commit
7b95784
·
verified ·
1 Parent(s): 9f8b574

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -2,17 +2,19 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
2
  import gradio as gr
3
  import torch
4
 
5
- # Load the model
6
- model_name = "HuggingFaceH4/starchat2-15b-v0.1" # Your main model
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
9
 
10
  def generate_text(input_text):
11
  # Ensure input is in the correct format
12
  input_tensor = tokenizer(input_text, return_tensors="pt", clean_up_tokenization_spaces=True)
13
- output = model.generate(**input_tensor)
 
 
14
  response = tokenizer.decode(output[0], skip_special_tokens=True)
15
  return response
16
 
17
  iface = gr.Interface(fn=generate_text, inputs="text", outputs="text", allow_flagging="never")
18
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
2
  import gradio as gr
3
  import torch
4
 
5
+ # Load a smaller model to reduce memory usage
6
+ model_name = "distilgpt2" # Smaller model
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
9
 
10
  def generate_text(input_text):
11
  # Ensure input is in the correct format
12
  input_tensor = tokenizer(input_text, return_tensors="pt", clean_up_tokenization_spaces=True)
13
+
14
+ # Generate text with a limit on max_length to reduce memory usage
15
+ output = model.generate(**input_tensor, max_length=50) # Adjust max_length as needed
16
  response = tokenizer.decode(output[0], skip_special_tokens=True)
17
  return response
18
 
19
  iface = gr.Interface(fn=generate_text, inputs="text", outputs="text", allow_flagging="never")
20
+ iface.launch(server_name="0.0.0.0", server_port=7860)