Spaces:

apratim24
/

Image_to_Story_Generator

Runtime error

App Files Files Community

apratim24 commited on May 30, 2024

Commit

058805f

verified ·

1 Parent(s): 4be22a3

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -15

app.py CHANGED Viewed

@@ -1,26 +1,91 @@
 import gradio as gr
-from langchain_openai import OpenAI
-from transformers import pipeline
-from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
 import os
 openai_api_key = os.getenv("OPENAI_API_KEY")
-# Load text generation model
-# text_generation_model = pipeline("text-generation", model="openai-community/gpt2-large")
-# text_generation_model = pipeline("text-generation", model="distilbert/distilgpt2")
 # Load image captioning model
 encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
 decoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
 model_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
 feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
 tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
 model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint)
 def generate_story(image, theme, genre, word_count):
     try:
         # Preprocess the image
@@ -35,16 +100,15 @@ def generate_story(image, theme, genre, word_count):
         # Generate story based on the caption
         story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
-        llm = OpenAI(model_name="gpt-3.5-turbo-instruct", openai_api_key=openai_api_key)
-        story = llm.invoke(story_prompt)
-        # story = text_generation_model(story_prompt, max_length=150)[0]["generated_text"]
         return caption_text, story
     except Exception as e:
         return f"An error occurred during inference: {str(e)}"
 # Gradio interface
 input_image = gr.Image(label="Select Image",type="pil")
@@ -64,4 +128,4 @@ gr.Interface(
     examples = examples,
     title="Image to Story Generator",
     description="Generate a story from an image taking theme and genre as input. It leverages image captioning and text generation models.",
-).launch()

 import gradio as gr
+# Using openai models ---------------------------------------------------------
+from langchain_openai import OpenAI
 import os
 openai_api_key = os.getenv("OPENAI_API_KEY")
+import io
+import base64
+import requests
+import json
+width = 800
+# Function to call the API for image and get the response
+def get_response_for_image(openai_api_key, image):
+    base64_image = base64.b64encode(image).decode('utf-8')
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {openai_api_key}"
+    }
+    payload = {
+        "model": "gpt-4o",
+        "messages": [
+          {
+            "role": "user",
+            "content": [
+              {
+                "type": "text",
+                "text": '''Describe or caption the image within 20 words. Output in json format with key: Description'''
+              },
+              {
+                "type": "image_url",
+                "image_url": {
+                  "url": f"data:image/jpeg;base64,{base64_image}",
+                  "detail": "low"
+                }
+              }
+            ]
+          }
+        ],
+        "max_tokens": 200
+    }
+    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
+    return response.json()
+def generate_story(image, theme, genre, word_count):
+    try:
+        # Convert PIL image to bytes-like format
+        with io.BytesIO() as output:
+            image.save(output, format="JPEG")
+            image_bytes = output.getvalue()
+        # Decode the caption
+        caption_response = get_response_for_image(openai_api_key, image_bytes)
+        json_str = caption_response['choices'][0]['message']['content']
+        json_str = json_str.replace('```json', '').replace('```', '').strip()
+        content_json = json.loads(json_str)
+        caption_text = content_json['Description']
+        # Generate story based on the caption
+        story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
+        llm = OpenAI(model_name="gpt-3.5-turbo-instruct", openai_api_key=openai_api_key)
+        story = llm.invoke(story_prompt)
+        return caption_text, story
+    except Exception as e:
+        return f"An error occurred during inference: {str(e)}"
+# Using open source models ----------------------------------------------------
+'''
+from transformers import pipeline, AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
+# Load text generation model
+text_generation_model = pipeline("text-generation", model="distilbert/distilgpt2")
 # Load image captioning model
 encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
 decoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
 model_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
 feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
 tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
 model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint)
 def generate_story(image, theme, genre, word_count):
     try:
         # Preprocess the image
         # Generate story based on the caption
         story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
+        story = text_generation_model(story_prompt, max_length=150)[0]["generated_text"]
         return caption_text, story
     except Exception as e:
         return f"An error occurred during inference: {str(e)}"
+'''
+# -------------------------------------------------------------------------
 # Gradio interface
 input_image = gr.Image(label="Select Image",type="pil")
     examples = examples,
     title="Image to Story Generator",
     description="Generate a story from an image taking theme and genre as input. It leverages image captioning and text generation models.",
+).launch()