apratim24 commited on
Commit
1b094c5
·
verified ·
1 Parent(s): f964c3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -11
app.py CHANGED
@@ -1,15 +1,89 @@
1
  import gradio as gr
2
- # from langchain.llms import OpenAI
3
- from langchain_openai import OpenAI
4
- from transformers import pipeline
5
- from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
6
 
 
 
 
7
  import os
8
  openai_api_key = os.getenv("OPENAI_API_KEY")
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  # Load text generation model
11
- # text_generation_model = pipeline("text-generation", model="openai-community/gpt2-large")
12
- # text_generation_model = pipeline("text-generation", model="distilbert/distilgpt2")
13
 
14
  # Load image captioning model
15
  encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
@@ -20,7 +94,6 @@ feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
20
  tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
21
  model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint)
22
 
23
-
24
  def generate_story(image, theme, genre, word_count):
25
  try:
26
  # Preprocess the image
@@ -36,15 +109,15 @@ def generate_story(image, theme, genre, word_count):
36
  # Generate story based on the caption
37
  story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
38
 
39
- llm = OpenAI(model_name="gpt-3.5-turbo-instruct", openai_api_key=openai_api_key)
40
- story = llm.invoke(story_prompt)
41
- # story = text_generation_model(story_prompt, max_length=150)[0]["generated_text"]
42
 
43
  return caption_text, story
44
  except Exception as e:
45
  return f"An error occurred during inference: {str(e)}"
 
46
 
47
 
 
48
 
49
  # Gradio interface
50
  input_image = gr.Image(label="Select Image",type="pil")
@@ -64,4 +137,4 @@ gr.Interface(
64
  examples = examples,
65
  title="Image to Story Generator",
66
  description="Generate a story from an image taking theme and genre as input. It leverages image captioning and text generation models.",
67
- ).launch()
 
1
  import gradio as gr
 
 
 
 
2
 
3
+ # Using openai models ---------------------------------------------------------
4
+
5
+ from langchain_openai import OpenAI
6
  import os
7
  openai_api_key = os.getenv("OPENAI_API_KEY")
8
 
9
+ import io
10
+ import base64
11
+ import requests
12
+
13
+ def generate_story(image, theme, genre, word_count):
14
+ try:
15
+
16
+ width = 1000
17
+
18
+ # Function to resize image maintaining aspect ratio with a maximum width of 1000 pixels
19
+ def resize_image(image, max_width=width):
20
+ with Image.open(image) as img:
21
+ ratio = max_width / img.width
22
+ new_height = int(img.height * ratio)
23
+ resized_img = img.resize((max_width, new_height), Image.ANTIALIAS)
24
+ img_byte_arr = io.BytesIO()
25
+ resized_img.save(img_byte_arr, format=img.format)
26
+ return img_byte_arr.getvalue()
27
+
28
+ # Function to encode the image to base64
29
+ def encode_image(image):
30
+ resized_image_bytes = resize_image(image) # Resize the image
31
+ return base64.b64encode(resized_image_bytes).decode('utf-8')
32
+
33
+ # Function to call the API for image and get the response
34
+ def get_response_for_image(openai_api_key, image):
35
+ base64_image = encode_image(image)
36
+ headers = {
37
+ "Content-Type": "application/json",
38
+ "Authorization": f"Bearer {openai_api_key}"
39
+ }
40
+ payload = {
41
+ "model": "gpt-4o",
42
+ "messages": [
43
+ {
44
+ "role": "user",
45
+ "content": [
46
+ {
47
+ "type": "text",
48
+ "text": '''Describe or caption the image within 20 words. Output in json format with key: Description'''
49
+ },
50
+ {
51
+ "type": "image_url",
52
+ "image_url": {
53
+ "url": f"data:image/jpeg;base64,{base64_image}",
54
+ "detail": "low"
55
+ }
56
+ }
57
+ ]
58
+ }
59
+ ],
60
+ "max_tokens": 500
61
+ }
62
+ response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
63
+ return response['choices'][0]['message']['content']
64
+
65
+
66
+ # Decode the caption
67
+ caption_text = get_response_for_image(openai_api_key, image)
68
+
69
+ # Generate story based on the caption
70
+ story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
71
+
72
+ llm = OpenAI(model_name="gpt-3.5-turbo-instruct", openai_api_key=openai_api_key)
73
+ story = llm.invoke(story_prompt)
74
+
75
+ return caption_text, story
76
+ except Exception as e:
77
+ return f"An error occurred during inference: {str(e)}"
78
+
79
+
80
+ # Using open source models ----------------------------------------------------
81
+
82
+ '''
83
+ from transformers import pipeline, AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
84
+
85
  # Load text generation model
86
+ text_generation_model = pipeline("text-generation", model="distilbert/distilgpt2")
 
87
 
88
  # Load image captioning model
89
  encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
 
94
  tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
95
  model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint)
96
 
 
97
  def generate_story(image, theme, genre, word_count):
98
  try:
99
  # Preprocess the image
 
109
  # Generate story based on the caption
110
  story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
111
 
112
+ story = text_generation_model(story_prompt, max_length=150)[0]["generated_text"]
 
 
113
 
114
  return caption_text, story
115
  except Exception as e:
116
  return f"An error occurred during inference: {str(e)}"
117
+ '''
118
 
119
 
120
+ # -------------------------------------------------------------------------
121
 
122
  # Gradio interface
123
  input_image = gr.Image(label="Select Image",type="pil")
 
137
  examples = examples,
138
  title="Image to Story Generator",
139
  description="Generate a story from an image taking theme and genre as input. It leverages image captioning and text generation models.",
140
+ ).launch()