jcrissa commited on
Commit
3c2f453
·
1 Parent(s): 907eba6

edit app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -37
app.py CHANGED
@@ -7,56 +7,67 @@ from transformers import AutoTokenizer
7
  # Load your fine-tuned Phi-3 model from Hugging Face
8
  MODEL_NAME = "jcrissa/phi3-new-t2i"
9
 
 
 
10
 
11
- device = "cuda"
12
-
13
- @spaces.GPU
14
  def load_phi3_model():
15
- # Load the Phi-3 model and tokenizer from Hugging Face
16
- model, tokenizer = FastLanguageModel.from_pretrained(
17
- MODEL_NAME,
18
- max_seq_length=4096, # Ensure it matches your fine-tuning
19
- dtype=torch.float16 # Use `torch.float16` if running on GPU
20
- )
21
-
22
- model.to(device)
23
-
24
- # Configure tokenizer settings
25
- tokenizer.pad_token = tokenizer.eos_token
26
- tokenizer.padding_side = "left"
27
-
28
- return model, tokenizer
 
 
 
29
 
 
30
  phi3_model, phi3_tokenizer = load_phi3_model()
31
 
 
 
 
32
  # Function to generate text using Phi-3
33
  def generate(plain_text):
34
- # Tokenize input text and move to the device
35
- input_ids = phi3_tokenizer(plain_text.strip(), return_tensors="pt").input_ids.to(device)
36
- eos_id = phi3_tokenizer.eos_token_id
37
-
38
- # Generate the output from the model
39
- outputs = phi3_model.generate(
40
- input_ids,
41
- do_sample=True,
42
- max_new_tokens=75,
43
- num_beams=8,
44
- num_return_sequences=1,
45
- eos_token_id=eos_id,
46
- pad_token_id=eos_id,
47
- length_penalty=-1.0
48
- )
49
-
50
- # Decode and return the generated text
51
- output_text = phi3_tokenizer.decode(outputs[0], skip_special_tokens=True)
52
- return output_text.strip()
 
 
 
53
 
54
  # Setup Gradio Interface
55
  txt = grad.Textbox(lines=1, label="Input Text", placeholder="Enter your prompt")
56
  out = grad.Textbox(lines=1, label="Generated Text")
57
 
58
  # Launch Gradio Interface with ZeroGPU-compatible setup
59
- grad.Interface(
60
  fn=generate,
61
  inputs=txt,
62
  outputs=out,
 
7
  # Load your fine-tuned Phi-3 model from Hugging Face
8
  MODEL_NAME = "jcrissa/phi3-new-t2i"
9
 
10
+ # Check if CUDA is available, otherwise fall back to CPU
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
 
13
+ # Function to load the Phi-3 model and tokenizer
14
+ @spaces.GPU # Reintroduced spaces.GPU decorator for GPU setup
 
15
  def load_phi3_model():
16
+ try:
17
+ # Load the Phi-3 model and tokenizer from Hugging Face
18
+ model, tokenizer = FastLanguageModel.from_pretrained(
19
+ MODEL_NAME,
20
+ max_seq_length=4096, # Ensure it matches your fine-tuning
21
+ dtype=torch.float16 if device == "cuda" else torch.float32 # Use `float16` for GPU, `float32` for CPU
22
+ )
23
+ model.to(device)
24
+
25
+ # Configure tokenizer settings
26
+ tokenizer.pad_token = tokenizer.eos_token
27
+ tokenizer.padding_side = "left"
28
+
29
+ return model, tokenizer
30
+ except Exception as e:
31
+ print(f"Error loading model: {e}")
32
+ return None, None
33
 
34
+ # Load the model and tokenizer, ensure error handling
35
  phi3_model, phi3_tokenizer = load_phi3_model()
36
 
37
+ if phi3_model is None or phi3_tokenizer is None:
38
+ raise RuntimeError("Model and tokenizer could not be loaded. Please check the Hugging Face model path or network connection.")
39
+
40
  # Function to generate text using Phi-3
41
  def generate(plain_text):
42
+ try:
43
+ # Tokenize input text and move to the device
44
+ input_ids = phi3_tokenizer(plain_text.strip(), return_tensors="pt").input_ids.to(device)
45
+ eos_id = phi3_tokenizer.eos_token_id
46
+
47
+ # Generate the output from the model
48
+ outputs = phi3_model.generate(
49
+ input_ids,
50
+ do_sample=True,
51
+ max_new_tokens=75,
52
+ num_beams=8,
53
+ num_return_sequences=1,
54
+ eos_token_id=eos_id,
55
+ pad_token_id=eos_id,
56
+ length_penalty=-1.0
57
+ )
58
+
59
+ # Decode and return the generated text
60
+ output_text = phi3_tokenizer.decode(outputs[0], skip_special_tokens=True)
61
+ return output_text.strip()
62
+ except Exception as e:
63
+ return f"Error during text generation: {e}"
64
 
65
  # Setup Gradio Interface
66
  txt = grad.Textbox(lines=1, label="Input Text", placeholder="Enter your prompt")
67
  out = grad.Textbox(lines=1, label="Generated Text")
68
 
69
  # Launch Gradio Interface with ZeroGPU-compatible setup
70
+ gr.Interface(
71
  fn=generate,
72
  inputs=txt,
73
  outputs=out,