hskwon7 commited on
Commit
9f8fd3c
·
verified ·
1 Parent(s): c22e261

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -55
app.py CHANGED
@@ -4,80 +4,54 @@ from PIL import Image
4
  import io
5
  from gtts import gTTS
6
 
7
- # Page config
8
  st.title("🖼️ → 📖 Image-to-Story Demo")
9
  st.write("Upload an image and watch as it’s captioned, turned into a short story, and even read aloud!")
10
 
11
- # Load and cache pipelines
12
  @st.cache_resource
13
  def load_captioner():
14
  return pipeline("image-to-text", model="unography/blip-large-long-cap")
15
 
16
  @st.cache_resource
17
  def load_story_gen():
18
- return pipeline(
19
- "text-generation",
20
- model="gpt2",
21
- tokenizer="gpt2"
22
- )
23
 
24
  captioner = load_captioner()
25
  story_gen = load_story_gen()
26
 
27
- # 1) Image upload
28
- uploaded = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
29
  if uploaded:
30
  img = Image.open(uploaded)
31
  st.image(img, use_column_width=True)
32
 
33
- # 2) Generate caption
34
- with st.spinner("Generating caption"):
35
- caps = captioner(img)
36
- # `caps` is a list of dicts like [{"generated_text": "..."}]
37
- caption = caps[0]["generated_text"]
38
- st.write("**Caption:**", caption)
39
-
40
- # 3) Generate story from caption
41
- with st.spinner("Spinning up a story…"):
42
- story_out = story_gen(
43
- caption,
44
- max_length=200,
45
- num_return_sequences=1,
46
- do_sample=True,
47
- top_p=0.9
48
- )
49
- story = story_out[0]["generated_text"]
50
- st.write("**Story:**", story)
51
-
52
- # 4) Play story as audio
53
- if st.button("🔊 Play Story Audio"):
54
  with st.spinner("Generating audio…"):
55
- tts = gTTS(text=story, lang="en")
56
  buf = io.BytesIO()
57
  tts.write_to_fp(buf)
58
  buf.seek(0)
59
- st.audio(buf.read(), format="audio/mp3")
60
-
61
-
62
- """
63
- import streamlit as st
64
- from transformers import pipeline
65
-
66
- def main():
67
- sentiment_pipeline = pipeline(model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
68
-
69
- st.title("Sentiment Analysis with HuggingFace Spaces")
70
- st.write("Enter a sentence to analyze its sentiment:")
71
 
72
- user_input = st.text_input("")
73
- if user_input:
74
- result = sentiment_pipeline(user_input)
75
- sentiment = result[0]["label"]
76
- confidence = result[0]["score"]
77
-
78
- st.write(f"Sentiment: {sentiment}")
79
- st.write(f"Confidence: {confidence:.2f}")
80
-
81
- if __name__ == "__main__":
82
- main()
83
- """
 
4
  import io
5
  from gtts import gTTS
6
 
 
7
  st.title("🖼️ → 📖 Image-to-Story Demo")
8
  st.write("Upload an image and watch as it’s captioned, turned into a short story, and even read aloud!")
9
 
 
10
  @st.cache_resource
11
  def load_captioner():
12
  return pipeline("image-to-text", model="unography/blip-large-long-cap")
13
 
14
  @st.cache_resource
15
  def load_story_gen():
16
+ return pipeline("text-generation", model="gpt2", tokenizer="gpt2")
 
 
 
 
17
 
18
  captioner = load_captioner()
19
  story_gen = load_story_gen()
20
 
21
+ # 1) Upload (key='image' gives us st.session_state.image)
22
+ uploaded = st.file_uploader("Upload an image", type=["png","jpg","jpeg"], key="image")
23
  if uploaded:
24
  img = Image.open(uploaded)
25
  st.image(img, use_column_width=True)
26
 
27
+ # 2) Caption (once per upload)
28
+ if "caption" not in st.session_state:
29
+ with st.spinner("Generating caption…"):
30
+ st.session_state.caption = captioner(img)[0]["generated_text"]
31
+ st.write("**Caption:**", st.session_state.caption)
32
+
33
+ # 3) Story (once per upload)
34
+ if "story" not in st.session_state:
35
+ with st.spinner("Spinning up a story…"):
36
+ out = story_gen(
37
+ st.session_state.caption,
38
+ max_length=200,
39
+ num_return_sequences=1,
40
+ do_sample=True,
41
+ top_p=0.9
42
+ )
43
+ st.session_state.story = out[0]["generated_text"]
44
+ st.write("**Story:**", st.session_state.story)
45
+
46
+ # 4) Pre-generate audio buffer (once per upload)
47
+ if "audio_buffer" not in st.session_state:
48
  with st.spinner("Generating audio…"):
49
+ tts = gTTS(text=st.session_state.story, lang="en")
50
  buf = io.BytesIO()
51
  tts.write_to_fp(buf)
52
  buf.seek(0)
53
+ st.session_state.audio_buffer = buf.read()
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ # 5) Play on demand
56
+ if st.button("🔊 Play Story Audio"):
57
+ st.audio(st.session_state.audio_buffer, format="audio/mp3")