apratim24 commited on
Commit
058805f
·
verified ·
1 Parent(s): 4be22a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -15
app.py CHANGED
@@ -1,26 +1,91 @@
1
-
2
  import gradio as gr
3
- from langchain_openai import OpenAI
4
- from transformers import pipeline
5
- from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
6
 
 
 
 
7
  import os
8
  openai_api_key = os.getenv("OPENAI_API_KEY")
9
 
10
- # Load text generation model
11
- # text_generation_model = pipeline("text-generation", model="openai-community/gpt2-large")
12
- # text_generation_model = pipeline("text-generation", model="distilbert/distilgpt2")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
 
 
 
 
14
  # Load image captioning model
15
  encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
16
  decoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
17
  model_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
18
-
19
  feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
20
  tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
21
  model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint)
22
-
23
-
24
  def generate_story(image, theme, genre, word_count):
25
  try:
26
  # Preprocess the image
@@ -35,16 +100,15 @@ def generate_story(image, theme, genre, word_count):
35
 
36
  # Generate story based on the caption
37
  story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
38
-
39
- llm = OpenAI(model_name="gpt-3.5-turbo-instruct", openai_api_key=openai_api_key)
40
- story = llm.invoke(story_prompt)
41
- # story = text_generation_model(story_prompt, max_length=150)[0]["generated_text"]
42
 
43
  return caption_text, story
44
  except Exception as e:
45
  return f"An error occurred during inference: {str(e)}"
 
46
 
47
 
 
48
 
49
  # Gradio interface
50
  input_image = gr.Image(label="Select Image",type="pil")
@@ -64,4 +128,4 @@ gr.Interface(
64
  examples = examples,
65
  title="Image to Story Generator",
66
  description="Generate a story from an image taking theme and genre as input. It leverages image captioning and text generation models.",
67
- ).launch()
 
 
1
  import gradio as gr
 
 
 
2
 
3
+ # Using openai models ---------------------------------------------------------
4
+
5
+ from langchain_openai import OpenAI
6
  import os
7
  openai_api_key = os.getenv("OPENAI_API_KEY")
8
 
9
+ import io
10
+ import base64
11
+ import requests
12
+ import json
13
+
14
+ width = 800
15
+
16
+
17
+ # Function to call the API for image and get the response
18
+ def get_response_for_image(openai_api_key, image):
19
+ base64_image = base64.b64encode(image).decode('utf-8')
20
+ headers = {
21
+ "Content-Type": "application/json",
22
+ "Authorization": f"Bearer {openai_api_key}"
23
+ }
24
+ payload = {
25
+ "model": "gpt-4o",
26
+ "messages": [
27
+ {
28
+ "role": "user",
29
+ "content": [
30
+ {
31
+ "type": "text",
32
+ "text": '''Describe or caption the image within 20 words. Output in json format with key: Description'''
33
+ },
34
+ {
35
+ "type": "image_url",
36
+ "image_url": {
37
+ "url": f"data:image/jpeg;base64,{base64_image}",
38
+ "detail": "low"
39
+ }
40
+ }
41
+ ]
42
+ }
43
+ ],
44
+ "max_tokens": 200
45
+ }
46
+ response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
47
+ return response.json()
48
+
49
+
50
+ def generate_story(image, theme, genre, word_count):
51
+ try:
52
+ # Convert PIL image to bytes-like format
53
+ with io.BytesIO() as output:
54
+ image.save(output, format="JPEG")
55
+ image_bytes = output.getvalue()
56
+
57
+ # Decode the caption
58
+ caption_response = get_response_for_image(openai_api_key, image_bytes)
59
+ json_str = caption_response['choices'][0]['message']['content']
60
+ json_str = json_str.replace('```json', '').replace('```', '').strip()
61
+ content_json = json.loads(json_str)
62
+ caption_text = content_json['Description']
63
+
64
+
65
+ # Generate story based on the caption
66
+ story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
67
+
68
+ llm = OpenAI(model_name="gpt-3.5-turbo-instruct", openai_api_key=openai_api_key)
69
+ story = llm.invoke(story_prompt)
70
+
71
+ return caption_text, story
72
+ except Exception as e:
73
+ return f"An error occurred during inference: {str(e)}"
74
+
75
+
76
+ # Using open source models ----------------------------------------------------
77
 
78
+ '''
79
+ from transformers import pipeline, AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
80
+ # Load text generation model
81
+ text_generation_model = pipeline("text-generation", model="distilbert/distilgpt2")
82
  # Load image captioning model
83
  encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
84
  decoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
85
  model_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
 
86
  feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
87
  tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
88
  model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint)
 
 
89
  def generate_story(image, theme, genre, word_count):
90
  try:
91
  # Preprocess the image
 
100
 
101
  # Generate story based on the caption
102
  story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
103
+ story = text_generation_model(story_prompt, max_length=150)[0]["generated_text"]
 
 
 
104
 
105
  return caption_text, story
106
  except Exception as e:
107
  return f"An error occurred during inference: {str(e)}"
108
+ '''
109
 
110
 
111
+ # -------------------------------------------------------------------------
112
 
113
  # Gradio interface
114
  input_image = gr.Image(label="Select Image",type="pil")
 
128
  examples = examples,
129
  title="Image to Story Generator",
130
  description="Generate a story from an image taking theme and genre as input. It leverages image captioning and text generation models.",
131
+ ).launch()