File size: 966 Bytes
c8cff61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# Importing the requirements
from transformers import BlipProcessor, BlipForQuestionAnswering

# Load the model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")


# Function to answer the question
def answer_question(image, text):
    """
    Generates an answer to a given question based on the provided image and text.

    Args:
        image (str): The path to the image file.
        text (str): The question text.

    Returns:
        str: The generated answer to the question.
    """

    # Process the inputs and generate the ids
    inputs = processor(images=image, text=text, return_tensors="pt")
    generated_ids = model.generate(**inputs, max_length=50)

    # Decode the generated IDs
    generated_answer = processor.batch_decode(generated_ids, skip_special_tokens=True)

    # Return the generated answer
    return generated_answer[0]