arjunanand13 commited on
Commit
e54e292
·
verified ·
1 Parent(s): 3071ce3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -11
app.py CHANGED
@@ -5,28 +5,36 @@ from transformers import AutoModelForCausalLM, AutoProcessor
5
  import torch
6
  import gradio as gr
7
 
8
- # Load the model and processor
9
  model_name = "arjunanand13/Florence-enphase2"
10
 
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
- print(device)
13
- model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to(device)
14
 
 
15
  processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
 
16
  torch.cuda.empty_cache()
17
 
 
 
 
 
18
 
19
  def predict(image, question):
20
 
21
- encoding = processor(image, question, return_tensors="pt")
 
 
 
 
22
 
23
  with torch.no_grad():
24
  outputs = model.generate(**encoding, max_length=256)
 
25
  answer = processor.batch_decode(outputs, skip_special_tokens=True)[0]
26
  return answer
27
 
28
  def gradio_interface(image, question):
29
-
30
  if image.mode != "RGB":
31
  image = image.convert("RGB")
32
 
@@ -36,13 +44,13 @@ def gradio_interface(image, question):
36
  iface = gr.Interface(
37
  fn=gradio_interface,
38
  inputs=[
39
- gr.Image(type="pil", label="Upload Image"),
40
- gr.Textbox(label="Enter your question")
41
  ],
42
  outputs=gr.Textbox(label="Answer"),
43
- title="Florence-enphase Leg lift classifier",
44
- description="Upload an image and ask a question about it."
 
45
  )
46
-
47
 
48
- iface.launch()
 
5
  import torch
6
  import gradio as gr
7
 
 
8
  model_name = "arjunanand13/Florence-enphase2"
9
 
10
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
+ print(f"Using device: {device}")
 
12
 
13
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to(device)
14
  processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
15
+
16
  torch.cuda.empty_cache()
17
 
18
+ DEFAULT_PROMPT = ("You are a Leg Lift Classifier. There is an image of a throughput component "
19
+ "and we need to identify if the leg is inserted in the hole or not. Return 'True' "
20
+ "if any leg is not completely seated in the hole; return 'False' if the leg is inserted "
21
+ "in the hole. Return only the required JSON in this format: {Leg_lift: , Reason: }.")
22
 
23
  def predict(image, question):
24
 
25
+ if not isinstance(image, Image.Image):
26
+ raise ValueError(f"Expected image to be PIL.Image, but got {type(image)}")
27
+
28
+
29
+ encoding = processor(images=image, text=question, return_tensors="pt").to(device)
30
 
31
  with torch.no_grad():
32
  outputs = model.generate(**encoding, max_length=256)
33
+
34
  answer = processor.batch_decode(outputs, skip_special_tokens=True)[0]
35
  return answer
36
 
37
  def gradio_interface(image, question):
 
38
  if image.mode != "RGB":
39
  image = image.convert("RGB")
40
 
 
44
  iface = gr.Interface(
45
  fn=gradio_interface,
46
  inputs=[
47
+ gr.Image(type="pil", label="Upload Image"), # Ensures image is passed as a PIL object
48
+ gr.Textbox(label="Enter your question or edit the default prompt", lines=6, value=DEFAULT_PROMPT) # Default prompt pre-filled and editable
49
  ],
50
  outputs=gr.Textbox(label="Answer"),
51
+ title="Florence-enphase Leg Lift Classifier",
52
+ description=("Upload an image and ask a question about the leg lift. The model will classify whether "
53
+ "the leg is inserted in the hole or not based on the image. You can edit the default prompt if needed.")
54
  )
 
55
 
56
+ iface.launch(debug=True)