aTrapDeer commited on
Commit
88ccf2c
·
1 Parent(s): f391a5c

Upload 3 files

Browse files
Files changed (3) hide show
  1. main.py +86 -0
  2. packages.txt +6 -0
  3. requirements.txt +10 -0
main.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydub import AudioSegment
2
+ from dotenv import find_dotenv, load_dotenv
3
+ from transformers import pipeline
4
+ from elevenlabs import generate, play, voices
5
+ from elevenlabs import set_api_key
6
+ from elevenlabslib import ElevenLabsUser
7
+ import streamlit as st
8
+ import uuid
9
+
10
+ from langchain import LLMChain, PromptTemplate, OpenAI
11
+ import os
12
+
13
+ # Load environment variables
14
+ load_dotenv(find_dotenv())
15
+
16
+ def img2text(url):
17
+ image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
18
+ text = image_to_text(url)[0]["generated_text"]
19
+ print(text)
20
+ return text
21
+
22
+ def generated_story(scenario):
23
+ template = """
24
+ You are the famous comedian and podcaster Tim Dillon. You will generate a short story in the style of his podcast using a funny narrative with explicit language and an angry rant-like tone, the story should be no more than 100 words;
25
+ Context: {scenario}
26
+ Story:
27
+ """
28
+ prompt = PromptTemplate(template=template, input_variables=["scenario"])
29
+ story_llm = LLMChain(llm=OpenAI(model_name="gpt-3.5-turbo", temperature=1), prompt=prompt, verbose=True)
30
+ story = story_llm.predict(scenario=scenario)
31
+ print(story)
32
+ return story
33
+
34
+
35
+ def textToSpeech(story):
36
+ # Error Handling: Print available user-generated voices
37
+ try:
38
+ available_voices = user.get_available_voices()
39
+ print("Available Voices:", available_voices)
40
+ except Exception as e:
41
+ print("Error fetching available voices:", e)
42
+
43
+ # Set API key for ElevenLabs
44
+ set_api_key = os.getenv("ELEVENLABS_API_KEY")
45
+ user = ElevenLabsUser(set_api_key)
46
+ voice = user.get_voice_by_ID("cgOzEASJmlEWHtXnZJ5q")
47
+
48
+ # Generate the audio data
49
+ result = voice.generate_audio_v2(story)
50
+
51
+ # Assuming the audio data is the first element of the tuple
52
+ audio_data = result[0]
53
+
54
+ # Save the audio data to a file in the project folder
55
+ random_id = str(uuid.uuid4())
56
+ name = f"story_{random_id}.mp3"
57
+
58
+ #Save the audio data to a file in the project folder
59
+ with open(name, 'wb') as f:
60
+ f.write(audio_data)
61
+ return name
62
+
63
+ def main():
64
+ st.set_page_config(page_title="Tim Dillon Image To Story", page_icon="📖", layout="wide")
65
+ st.header("Tim Dillon Image To Story")
66
+ uploaded_file = st.file_uploader("Upload an image...", type="jpg")
67
+ if uploaded_file is not None:
68
+ print(uploaded_file)
69
+ bytes_data = uploaded_file.getvalue()
70
+ with open (uploaded_file.name, 'wb') as f:
71
+ f.write(bytes_data)
72
+ st.image(bytes_data, caption='Uploaded Image.', use_column_width=True)
73
+ scenario = img2text(uploaded_file.name)
74
+ story = generated_story(scenario)
75
+ generated_file_name = textToSpeech(story)
76
+
77
+ with st.expander("scenario"):
78
+ st.write(scenario)
79
+ with st.expander("story"):
80
+ st.write(story)
81
+
82
+ st.audio(generated_file_name)
83
+
84
+
85
+ if __name__ == "__main__":
86
+ main()
packages.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ build-essential
2
+ libasound-dev
3
+ portaudio19-dev
4
+ python3-pyaudio
5
+ libportaudio2
6
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ config==0.5.1
2
+ elevenlabs==0.2.24
3
+ elevenlabslib==0.11.1
4
+ langchain==0.0.274
5
+ pydub==0.25.1
6
+ python-dotenv==1.0.0
7
+ Requests==2.31.0
8
+ streamlit==1.24.0
9
+ transformers==4.32.0
10
+ torch>=1.8