shukdevdatta123 commited on
Commit
f30f4dc
·
verified ·
1 Parent(s): 648ab5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -34
app.py CHANGED
@@ -3,6 +3,7 @@ from openai import OpenAI
3
  from PIL import Image
4
  import base64
5
  import io
 
6
 
7
  def pil_to_base64(image, max_size=1024):
8
  """
@@ -14,60 +15,109 @@ def pil_to_base64(image, max_size=1024):
14
  image.save(buffered, format="JPEG")
15
  return base64.b64encode(buffered.getvalue()).decode()
16
 
17
- def generate_story(image, genre, api_key):
18
  """
19
- Generate a travel story based on the provided image and genre using the OpenRouter API.
20
  """
21
- if not image:
22
- return "Please upload an image."
23
- if not genre:
24
- return "Please enter a genre."
25
- if not api_key:
26
- return "Please enter your OpenRouter API key."
27
 
 
 
 
 
 
 
28
  try:
29
- image_base64 = pil_to_base64(image)
30
- client = OpenAI(
31
- base_url="https://openrouter.ai/api/v1",
32
- api_key=api_key,
33
- )
34
- prompt = f"Generate a {genre} story based on this travel photo."
35
- messages = [
36
- {
37
- "role": "user",
38
- "content": [
39
- {"type": "text", "text": prompt},
40
- {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}
41
- ]
42
- }
43
- ]
44
- completion = client.chat.completions.create(
45
- model="meta-llama/llama-3.2-11b-vision-instruct:free",
46
- messages=messages
47
- )
48
  story = completion.choices[0].message.content
49
- return story
 
50
  except Exception as e:
51
- return f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  # Gradio Interface
54
  with gr.Blocks() as demo:
55
  gr.Markdown("# Interactive Travel Story Generator")
56
  gr.Markdown("Upload a travel photo, select a genre, and provide your OpenRouter API key to generate a personalized travel story.")
57
- gr.Markdown("Note: You need an OpenRouter API key to use this app. Get one at [OpenRouter](https://openrouter.ai/).")
 
58
 
59
  with gr.Row():
60
  image_input = gr.Image(type="pil", label="Upload Travel Photo")
61
- genre_input = gr.Textbox(label="Story Genre (e.g., adventure, romance, mystery, fantasy, historical)")
62
  api_key_input = gr.Textbox(label="OpenRouter API Key", type="password")
63
 
64
  generate_button = gr.Button("Generate Story")
65
- output_text = gr.Textbox(label="Generated Story", lines=10)
 
 
 
 
 
 
 
66
 
 
 
 
 
 
 
67
  generate_button.click(
68
- fn=generate_story,
69
  inputs=[image_input, genre_input, api_key_input],
70
- outputs=output_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  )
72
 
73
  demo.launch()
 
3
  from PIL import Image
4
  import base64
5
  import io
6
+ from gtts import gTTS
7
 
8
  def pil_to_base64(image, max_size=1024):
9
  """
 
15
  image.save(buffered, format="JPEG")
16
  return base64.b64encode(buffered.getvalue()).decode()
17
 
18
+ def generate_initial_story(image, genre, api_key):
19
  """
20
+ Generate the initial travel story based on the image and genre.
21
  """
22
+ if not image or not genre or not api_key:
23
+ return "Please provide all inputs.", []
 
 
 
 
24
 
25
+ image_base64 = pil_to_base64(image)
26
+ client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=api_key)
27
+ prompt = f"Generate a {genre} story based on this travel photo."
28
+ messages = [
29
+ {"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}]}
30
+ ]
31
  try:
32
+ completion = client.chat.completions.create(model="meta-llama/llama-3.2-11b-vision-instruct:free", messages=messages)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  story = completion.choices[0].message.content
34
+ messages.append({"role": "assistant", "content": story})
35
+ return story, messages
36
  except Exception as e:
37
+ return f"Error: {str(e)}", []
38
+
39
+ def generate_continuation(continuation_prompt, messages, api_key):
40
+ """
41
+ Generate a continuation of the story based on the provided prompt.
42
+ """
43
+ if not continuation_prompt or not messages or not api_key:
44
+ return "Please provide a continuation prompt.", messages
45
+
46
+ client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=api_key)
47
+ new_user_message = {"role": "user", "content": continuation_prompt}
48
+ updated_messages = messages + [new_user_message]
49
+ try:
50
+ completion = client.chat.completions.create(model="meta-llama/llama-3.2-11b-vision-instruct:free", messages=updated_messages)
51
+ continuation = completion.choices[0].message.content
52
+ updated_messages.append({"role": "assistant", "content": continuation})
53
+ full_story = "\n\n".join([msg["content"] for msg in updated_messages if msg["role"] == "assistant"])
54
+ return full_story, updated_messages
55
+ except Exception as e:
56
+ return f"Error: {str(e)}", messages
57
+
58
+ def generate_audio(story):
59
+ """
60
+ Generate an audio file from the story text using gTTS.
61
+ """
62
+ if not story:
63
+ return None
64
+ tts = gTTS(text=story, lang='en')
65
+ audio_file = "story.mp3"
66
+ tts.save(audio_file)
67
+ return audio_file
68
 
69
  # Gradio Interface
70
  with gr.Blocks() as demo:
71
  gr.Markdown("# Interactive Travel Story Generator")
72
  gr.Markdown("Upload a travel photo, select a genre, and provide your OpenRouter API key to generate a personalized travel story.")
73
+ gr.Markdown("After generating a story, enter a prompt below to continue it, or click 'Generate Audio' to hear it!")
74
+ gr.Markdown("Note: You need an OpenRouter API key from [OpenRouter](https://openrouter.ai/).")
75
 
76
  with gr.Row():
77
  image_input = gr.Image(type="pil", label="Upload Travel Photo")
78
+ genre_input = gr.Textbox(label="Story Genre (e.g., adventure, romance, mystery)")
79
  api_key_input = gr.Textbox(label="OpenRouter API Key", type="password")
80
 
81
  generate_button = gr.Button("Generate Story")
82
+ story_output = gr.Textbox(label="Generated Story", lines=10)
83
+
84
+ with gr.Row():
85
+ tts_button = gr.Button("Generate Audio")
86
+ audio_output = gr.Audio(label="Story Audio")
87
+
88
+ continuation_prompt = gr.Textbox(label="Continuation Prompt (e.g., 'Now, the hero finds a mysterious map.')")
89
+ continue_button = gr.Button("Continue Story")
90
 
91
+ message_state = gr.State([])
92
+
93
+ def on_generate_story(image, genre, api_key):
94
+ story, messages = generate_initial_story(image, genre, api_key)
95
+ return story, messages
96
+
97
  generate_button.click(
98
+ fn=on_generate_story,
99
  inputs=[image_input, genre_input, api_key_input],
100
+ outputs=[story_output, message_state]
101
+ )
102
+
103
+ def on_generate_continuation(continuation_prompt, message_state, api_key):
104
+ full_story, updated_messages = generate_continuation(continuation_prompt, message_state, api_key)
105
+ return full_story, updated_messages
106
+
107
+ continue_button.click(
108
+ fn=on_generate_continuation,
109
+ inputs=[continuation_prompt, message_state, api_key_input],
110
+ outputs=[story_output, message_state]
111
+ )
112
+
113
+ def on_generate_audio(story):
114
+ audio_file = generate_audio(story)
115
+ return audio_file
116
+
117
+ tts_button.click(
118
+ fn=on_generate_audio,
119
+ inputs=story_output,
120
+ outputs=audio_output
121
  )
122
 
123
  demo.launch()