xzerus commited on
Commit
1258570
·
verified ·
1 Parent(s): c5e37aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -9,7 +9,7 @@ import logging
9
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
10
 
11
  # Device Configuration
12
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
 
14
  # ImageNet normalization values
15
  IMAGENET_MEAN = (0.485, 0.456, 0.406)
@@ -28,7 +28,7 @@ def build_transform(input_size):
28
  def preprocess_image(image, input_size=448):
29
  """Preprocess the image to the required format."""
30
  transform = build_transform(input_size)
31
- tensor_image = transform(image).unsqueeze(0).to(torch.float32 if device == "cpu" else torch.bfloat16).to(device)
32
  return tensor_image
33
 
34
  # Load the model and tokenizer
@@ -36,7 +36,7 @@ logging.info("Loading model from Hugging Face Hub...")
36
  model_path = "OpenGVLab/InternVL2_5-1B"
37
  model = AutoModel.from_pretrained(
38
  model_path,
39
- torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
40
  trust_remote_code=True,
41
  ).to(device).eval()
42
 
@@ -55,13 +55,14 @@ def describe_image(image):
55
  pixel_values = preprocess_image(image, input_size=448)
56
  prompt = "<image>\nExtract text from the image, respond with only the extracted text."
57
 
 
58
  response = model.chat(
59
  tokenizer=tokenizer,
60
  pixel_values=pixel_values,
61
  question=prompt,
62
  history=None,
63
  return_history=False,
64
- generation_config=dict(max_new_tokens=512, do_sample=True)
65
  )
66
  return response
67
  except Exception as e:
@@ -78,4 +79,4 @@ interface = gr.Interface(
78
  )
79
 
80
  if __name__ == "__main__":
81
- interface.launch(server_name="0.0.0.0", server_port=7860)
 
9
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
10
 
11
  # Device Configuration
12
+ device = torch.device("cpu") # Force CPU usage
13
 
14
  # ImageNet normalization values
15
  IMAGENET_MEAN = (0.485, 0.456, 0.406)
 
28
  def preprocess_image(image, input_size=448):
29
  """Preprocess the image to the required format."""
30
  transform = build_transform(input_size)
31
+ tensor_image = transform(image).unsqueeze(0).to(torch.float32).to(device) # Use float32 for CPU
32
  return tensor_image
33
 
34
  # Load the model and tokenizer
 
36
  model_path = "OpenGVLab/InternVL2_5-1B"
37
  model = AutoModel.from_pretrained(
38
  model_path,
39
+ torch_dtype=torch.float32, # Use float32 for CPU compatibility
40
  trust_remote_code=True,
41
  ).to(device).eval()
42
 
 
55
  pixel_values = preprocess_image(image, input_size=448)
56
  prompt = "<image>\nExtract text from the image, respond with only the extracted text."
57
 
58
+ # Perform inference
59
  response = model.chat(
60
  tokenizer=tokenizer,
61
  pixel_values=pixel_values,
62
  question=prompt,
63
  history=None,
64
  return_history=False,
65
+ generation_config=dict(max_new_tokens=512, do_sample=True),
66
  )
67
  return response
68
  except Exception as e:
 
79
  )
80
 
81
  if __name__ == "__main__":
82
+ interface.launch(server_name="0.0.0.0", server_port=7860, share=True)