Spaces:

apratim24
/

Image_to_Story_Generator

Runtime error

App Files Files Community

apratim24 commited on May 30, 2024

Commit

1b094c5

verified ·

1 Parent(s): f964c3b

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -11

app.py CHANGED Viewed

@@ -1,15 +1,89 @@
 import gradio as gr
-# from langchain.llms import OpenAI
-from langchain_openai import OpenAI
-from transformers import pipeline
-from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
 import os
 openai_api_key = os.getenv("OPENAI_API_KEY")
 # Load text generation model
-# text_generation_model = pipeline("text-generation", model="openai-community/gpt2-large")
-# text_generation_model = pipeline("text-generation", model="distilbert/distilgpt2")
 # Load image captioning model
 encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
@@ -20,7 +94,6 @@ feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
 tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
 model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint)
 def generate_story(image, theme, genre, word_count):
     try:
         # Preprocess the image
@@ -36,15 +109,15 @@ def generate_story(image, theme, genre, word_count):
         # Generate story based on the caption
         story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
-        llm = OpenAI(model_name="gpt-3.5-turbo-instruct", openai_api_key=openai_api_key)
-        story = llm.invoke(story_prompt)
-        # story = text_generation_model(story_prompt, max_length=150)[0]["generated_text"]
         return caption_text, story
     except Exception as e:
         return f"An error occurred during inference: {str(e)}"
 # Gradio interface
 input_image = gr.Image(label="Select Image",type="pil")
@@ -64,4 +137,4 @@ gr.Interface(
     examples = examples,
     title="Image to Story Generator",
     description="Generate a story from an image taking theme and genre as input. It leverages image captioning and text generation models.",
-).launch()

 import gradio as gr
+# Using openai models ---------------------------------------------------------
+from langchain_openai import OpenAI
 import os
 openai_api_key = os.getenv("OPENAI_API_KEY")
+import io
+import base64
+import requests
+def generate_story(image, theme, genre, word_count):
+    try:
+        width = 1000
+        # Function to resize image maintaining aspect ratio with a maximum width of 1000 pixels
+        def resize_image(image, max_width=width):
+            with Image.open(image) as img:
+                ratio = max_width / img.width
+                new_height = int(img.height * ratio)
+                resized_img = img.resize((max_width, new_height), Image.ANTIALIAS)
+                img_byte_arr = io.BytesIO()
+                resized_img.save(img_byte_arr, format=img.format)
+                return img_byte_arr.getvalue()
+        # Function to encode the image to base64
+        def encode_image(image):
+            resized_image_bytes = resize_image(image)  # Resize the image
+            return base64.b64encode(resized_image_bytes).decode('utf-8')
+        # Function to call the API for image and get the response
+        def get_response_for_image(openai_api_key, image):
+            base64_image = encode_image(image)
+            headers = {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {openai_api_key}"
+            }
+            payload = {
+                "model": "gpt-4o",
+                "messages": [
+                  {
+                    "role": "user",
+                    "content": [
+                      {
+                        "type": "text",
+                        "text": '''Describe or caption the image within 20 words. Output in json format with key: Description'''
+                      },
+                      {
+                        "type": "image_url",
+                        "image_url": {
+                          "url": f"data:image/jpeg;base64,{base64_image}",
+                          "detail": "low"
+                        }
+                      }
+                    ]
+                  }
+                ],
+                "max_tokens": 500
+            }
+            response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
+            return response['choices'][0]['message']['content']
+        # Decode the caption
+        caption_text = get_response_for_image(openai_api_key, image)
+        # Generate story based on the caption
+        story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
+        llm = OpenAI(model_name="gpt-3.5-turbo-instruct", openai_api_key=openai_api_key)
+        story = llm.invoke(story_prompt)
+        return caption_text, story
+    except Exception as e:
+        return f"An error occurred during inference: {str(e)}"
+# Using open source models ----------------------------------------------------
+'''
+from transformers import pipeline, AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
 # Load text generation model
+text_generation_model = pipeline("text-generation", model="distilbert/distilgpt2")
 # Load image captioning model
 encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
 tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
 model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint)
 def generate_story(image, theme, genre, word_count):
     try:
         # Preprocess the image
         # Generate story based on the caption
         story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
+        story = text_generation_model(story_prompt, max_length=150)[0]["generated_text"]
         return caption_text, story
     except Exception as e:
         return f"An error occurred during inference: {str(e)}"
+'''
+# -------------------------------------------------------------------------
 # Gradio interface
 input_image = gr.Image(label="Select Image",type="pil")
     examples = examples,
     title="Image to Story Generator",
     description="Generate a story from an image taking theme and genre as input. It leverages image captioning and text generation models.",
+).launch()