File size: 1,061 Bytes
9d9968c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image

class ImageToText:
    """

    Class to handle Image-to-Text captioning using BLIP.

    """
    def __init__(self):
        # Initialize the processor and model
        print("Loading Image-to-Text model...")
        self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
        self.model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
        print("Image-to-Text model loaded successfully.")
    
    
    async def generate_caption(self, image):
        """

        Generate a descriptive caption for an uploaded image.



        Args:

            image (PIL.Image): The image to caption.



        Returns:

            str: The generated caption.

        """
        inputs = self.processor(image, return_tensors="pt")
        out = self.model.generate(**inputs)
        caption = self.processor.decode(out[0], skip_special_tokens=True)
        return caption