Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -47,18 +47,22 @@ def analyze_input(image, question):
|
|
47 |
pixel_values = preprocess_image(image)
|
48 |
|
49 |
# Tokenize the question
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
52 |
# Construct the model_inputs dictionary
|
53 |
model_inputs = {
|
54 |
"input_ids": input_ids,
|
55 |
-
"pixel_values": pixel_values
|
|
|
56 |
}
|
57 |
|
58 |
# Generate the response using the model
|
59 |
outputs = model.generate(
|
60 |
model_inputs=model_inputs,
|
61 |
-
max_new_tokens=256
|
62 |
)
|
63 |
|
64 |
# Decode the response
|
|
|
47 |
pixel_values = preprocess_image(image)
|
48 |
|
49 |
# Tokenize the question
|
50 |
+
tokenized = tokenizer(question, return_tensors="pt")
|
51 |
+
input_ids = tokenized.input_ids.to(model.device)
|
52 |
+
|
53 |
+
# Calculate target size
|
54 |
+
tgt_size = input_ids.size(1) + 256 # Original input size + max new tokens
|
55 |
+
|
56 |
# Construct the model_inputs dictionary
|
57 |
model_inputs = {
|
58 |
"input_ids": input_ids,
|
59 |
+
"pixel_values": pixel_values,
|
60 |
+
"tgt_sizes": [tgt_size] # Add target sizes for generation
|
61 |
}
|
62 |
|
63 |
# Generate the response using the model
|
64 |
outputs = model.generate(
|
65 |
model_inputs=model_inputs,
|
|
|
66 |
)
|
67 |
|
68 |
# Decode the response
|