Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ import logging
|
|
9 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
10 |
|
11 |
# Device Configuration
|
12 |
-
device = torch.device("
|
13 |
|
14 |
# ImageNet normalization values
|
15 |
IMAGENET_MEAN = (0.485, 0.456, 0.406)
|
@@ -28,7 +28,7 @@ def build_transform(input_size):
|
|
28 |
def preprocess_image(image, input_size=448):
|
29 |
"""Preprocess the image to the required format."""
|
30 |
transform = build_transform(input_size)
|
31 |
-
tensor_image = transform(image).unsqueeze(0).to(torch.float32
|
32 |
return tensor_image
|
33 |
|
34 |
# Load the model and tokenizer
|
@@ -36,7 +36,7 @@ logging.info("Loading model from Hugging Face Hub...")
|
|
36 |
model_path = "OpenGVLab/InternVL2_5-1B"
|
37 |
model = AutoModel.from_pretrained(
|
38 |
model_path,
|
39 |
-
torch_dtype=torch.
|
40 |
trust_remote_code=True,
|
41 |
).to(device).eval()
|
42 |
|
@@ -55,13 +55,14 @@ def describe_image(image):
|
|
55 |
pixel_values = preprocess_image(image, input_size=448)
|
56 |
prompt = "<image>\nExtract text from the image, respond with only the extracted text."
|
57 |
|
|
|
58 |
response = model.chat(
|
59 |
tokenizer=tokenizer,
|
60 |
pixel_values=pixel_values,
|
61 |
question=prompt,
|
62 |
history=None,
|
63 |
return_history=False,
|
64 |
-
generation_config=dict(max_new_tokens=512, do_sample=True)
|
65 |
)
|
66 |
return response
|
67 |
except Exception as e:
|
@@ -78,4 +79,4 @@ interface = gr.Interface(
|
|
78 |
)
|
79 |
|
80 |
if __name__ == "__main__":
|
81 |
-
interface.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
9 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
10 |
|
11 |
# Device Configuration
|
12 |
+
device = torch.device("cpu") # Force CPU usage
|
13 |
|
14 |
# ImageNet normalization values
|
15 |
IMAGENET_MEAN = (0.485, 0.456, 0.406)
|
|
|
28 |
def preprocess_image(image, input_size=448):
|
29 |
"""Preprocess the image to the required format."""
|
30 |
transform = build_transform(input_size)
|
31 |
+
tensor_image = transform(image).unsqueeze(0).to(torch.float32).to(device) # Use float32 for CPU
|
32 |
return tensor_image
|
33 |
|
34 |
# Load the model and tokenizer
|
|
|
36 |
model_path = "OpenGVLab/InternVL2_5-1B"
|
37 |
model = AutoModel.from_pretrained(
|
38 |
model_path,
|
39 |
+
torch_dtype=torch.float32, # Use float32 for CPU compatibility
|
40 |
trust_remote_code=True,
|
41 |
).to(device).eval()
|
42 |
|
|
|
55 |
pixel_values = preprocess_image(image, input_size=448)
|
56 |
prompt = "<image>\nExtract text from the image, respond with only the extracted text."
|
57 |
|
58 |
+
# Perform inference
|
59 |
response = model.chat(
|
60 |
tokenizer=tokenizer,
|
61 |
pixel_values=pixel_values,
|
62 |
question=prompt,
|
63 |
history=None,
|
64 |
return_history=False,
|
65 |
+
generation_config=dict(max_new_tokens=512, do_sample=True),
|
66 |
)
|
67 |
return response
|
68 |
except Exception as e:
|
|
|
79 |
)
|
80 |
|
81 |
if __name__ == "__main__":
|
82 |
+
interface.launch(server_name="0.0.0.0", server_port=7860, share=True)
|