PhantHive commited on
Commit
679bcc5
·
verified ·
1 Parent(s): 058347f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -20
app.py CHANGED
@@ -3,31 +3,22 @@ from peft import PeftModel, PeftConfig
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
 
 
 
 
6
 
7
- if torch.cuda.is_available():
8
- device = torch.device("cuda")
9
- print("GPU is available!")
10
- else:
11
- device = torch.device("cpu")
12
- print("GPU is not available, using CPU.")
13
-
14
- # Load the model and config when the script starts
15
  peft_model_id = "phearion/bigbrain-v0.0.1"
16
  config = PeftConfig.from_pretrained(peft_model_id)
17
- model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
18
  tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
19
-
20
- # Load the Lora model
21
- model = PeftModel.from_pretrained(model, peft_model_id)
22
 
23
  def greet(text):
24
- batch = tokenizer("“aide moi avec les equa diff ” ->: ", return_tensors='pt')
25
-
26
- with torch.cuda.amp.autocast():
27
- output_tokens = model.generate(**batch, max_new_tokens=15)
28
-
29
  return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
30
 
31
-
32
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
33
- iface.launch()
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
 
6
+ # Device configuration (prioritize GPU if available)
7
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
+ model.to(device) # Move model to the selected device
9
 
10
+ # Load models and tokenizer efficiently
 
 
 
 
 
 
 
11
  peft_model_id = "phearion/bigbrain-v0.0.1"
12
  config = PeftConfig.from_pretrained(peft_model_id)
 
13
  tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
14
+ model = PeftModel.from_pretrained(peft_model_id).to(device)
 
 
15
 
16
  def greet(text):
17
+ with torch.no_grad(): # Disable gradient calculation for inference
18
+ batch = tokenizer(text, return_tensors='pt').to(device) # Move tensors to device
19
+ with torch.cuda.amp.autocast(): # Enable mixed-precision if available
20
+ output_tokens = model.generate(**batch, max_new_tokens=15)
 
21
  return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
22
 
23
+ iface = gr.Interface(fn=greet, inputs="text", outputs="text", title="PEFT Model for Big Brain", live=True)
24
+ iface.launch(share=True) # Share directly to Gradio Space