CR7CAD commited on
Commit
e77741a
·
verified ·
1 Parent(s): fbad1e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -45
app.py CHANGED
@@ -1,32 +1,31 @@
1
- # import part - only using the two requested imports
2
  import streamlit as st
3
  from transformers import pipeline
4
  from PIL import Image
5
- import io
6
 
7
  # function part
8
- # img2text
9
  def img2text(image):
10
- image_to_text = pipeline("image-to-text", model="sooh-j/blip-image-captioning-base")
11
- text = image_to_text(image)[0]["generated_text"]
 
12
  return text
13
 
14
- # text2story - IMPROVED to end naturally
15
  def text2story(text):
16
- # Using a smaller text generation model
17
- generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
18
 
19
- # Create a prompt for the story generation
20
- prompt = f"Write a fun children's story based on this: {text}. The story should be short and end naturally with a conclusion. Once upon a time, "
21
 
22
- # Generate the story
23
  story_result = generator(
24
  prompt,
25
- max_length=250, # Increased to allow for a complete story
26
  num_return_sequences=1,
27
  temperature=0.7,
28
  top_k=50,
29
- top_p=0.95,
30
  do_sample=True
31
  )
32
 
@@ -34,33 +33,25 @@ def text2story(text):
34
  story_text = story_result[0]['generated_text']
35
  story_text = story_text.replace(prompt, "Once upon a time, ")
36
 
37
- # Find a natural ending point (end of sentence) before 100 words
38
- words = story_text.split()
39
- if len(words) > 100:
40
- # Join the first 100 words
41
- shortened_text = " ".join(words[:100])
42
-
43
- # Find the last complete sentence
44
- last_period = shortened_text.rfind('.')
45
- last_question = shortened_text.rfind('?')
46
- last_exclamation = shortened_text.rfind('!')
47
-
48
- # Find the last sentence ending punctuation
49
- last_end = max(last_period, last_question, last_exclamation)
50
-
51
- if last_end > 0:
52
- # Truncate at the end of the last complete sentence
53
- story_text = shortened_text[:last_end + 1]
54
- else:
55
- # If no sentence ending found, just use the shortened text
56
- story_text = shortened_text
57
 
58
  return story_text
59
 
60
  # text2audio - Using HelpingAI-TTS-v1 model
61
  def text2audio(story_text):
62
  try:
63
- synthesizer = pipeline("text-to-speech", model="umarigan/speecht5_tts_tr_v1.0")
 
64
 
65
  # Limit text length to avoid timeouts
66
  max_chars = 500
@@ -74,9 +65,6 @@ def text2audio(story_text):
74
  # Generate speech
75
  speech = synthesizer(story_text)
76
 
77
- # Get output information
78
- st.write(f"Speech output keys: {list(speech.keys())}")
79
-
80
  return speech
81
 
82
  except Exception as e:
@@ -95,19 +83,25 @@ if uploaded_file is not None:
95
  # Convert the file to a PIL Image
96
  image = Image.open(uploaded_file)
97
 
 
 
 
98
  # Stage 1: Image to Text
99
- st.text('Processing img2text...')
100
- caption = img2text(image) # Pass PIL image to pipeline
101
- st.write(caption)
 
102
 
103
  # Stage 2: Text to Story
104
- st.text('Generating a story...')
105
- story = text2story(caption)
106
- st.write(story)
 
107
 
108
  # Stage 3: Story to Audio data
109
- st.text('Generating audio data...')
110
- speech_output = text2audio(story)
 
111
 
112
  # Play button
113
  if st.button("Play Audio"):
 
1
+ # import part
2
  import streamlit as st
3
  from transformers import pipeline
4
  from PIL import Image
 
5
 
6
  # function part
7
+ # img2text - Using a lighter model
8
  def img2text(image):
9
+ # Use a smaller, faster image captioning model
10
+ image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
11
+ text = image_to_text(image, max_new_tokens=20)[0]["generated_text"]
12
  return text
13
 
14
+ # text2story - Using a much faster model with constraints
15
  def text2story(text):
16
+ # Use a tiny model that's much faster
17
+ generator = pipeline("text-generation", model="distilgpt2")
18
 
19
+ # Create a more constrained prompt for faster generation
20
+ prompt = f"A short children's story about {text}: Once upon a time, "
21
 
22
+ # Generate with strict constraints for speed
23
  story_result = generator(
24
  prompt,
25
+ max_new_tokens=100, # Limit token generation
26
  num_return_sequences=1,
27
  temperature=0.7,
28
  top_k=50,
 
29
  do_sample=True
30
  )
31
 
 
33
  story_text = story_result[0]['generated_text']
34
  story_text = story_text.replace(prompt, "Once upon a time, ")
35
 
36
+ # Find a natural ending point (end of sentence)
37
+ last_period = story_text.rfind('.')
38
+ last_question = story_text.rfind('?')
39
+ last_exclamation = story_text.rfind('!')
40
+
41
+ # Find the last sentence ending punctuation
42
+ last_end = max(last_period, last_question, last_exclamation)
43
+
44
+ if last_end > 0:
45
+ # Truncate at the end of the last complete sentence
46
+ story_text = story_text[:last_end + 1]
 
 
 
 
 
 
 
 
 
47
 
48
  return story_text
49
 
50
  # text2audio - Using HelpingAI-TTS-v1 model
51
  def text2audio(story_text):
52
  try:
53
+ # Use the HelpingAI TTS model as requested
54
+ synthesizer = pipeline("text-to-speech", model="HelpingAI/HelpingAI-TTS-v1")
55
 
56
  # Limit text length to avoid timeouts
57
  max_chars = 500
 
65
  # Generate speech
66
  speech = synthesizer(story_text)
67
 
 
 
 
68
  return speech
69
 
70
  except Exception as e:
 
83
  # Convert the file to a PIL Image
84
  image = Image.open(uploaded_file)
85
 
86
+ # Progress indicator
87
+ progress_bar = st.progress(0)
88
+
89
  # Stage 1: Image to Text
90
+ with st.spinner('Processing image caption...'):
91
+ caption = img2text(image)
92
+ progress_bar.progress(33)
93
+ st.write(f"**Image caption:** {caption}")
94
 
95
  # Stage 2: Text to Story
96
+ with st.spinner('Creating story...'):
97
+ story = text2story(caption)
98
+ progress_bar.progress(66)
99
+ st.write(f"**Story:** {story}")
100
 
101
  # Stage 3: Story to Audio data
102
+ with st.spinner('Generating audio...'):
103
+ speech_output = text2audio(story)
104
+ progress_bar.progress(100)
105
 
106
  # Play button
107
  if st.button("Play Audio"):