iohanngrig commited on
Commit
23e5d9e
·
verified ·
1 Parent(s): a179be5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -28
app.py CHANGED
@@ -1,28 +1,138 @@
1
- import streamlit as st
2
- import easyocr as ocr
3
- import numpy as np
4
- from PIL import Image
5
-
6
- st.title("Image to Text Toy Application")
7
-
8
- image = st.file_uploader(label = "Upload your image here", type=['png','jpg','jpeg'])
9
-
10
- @st.cache_data
11
- def load_model():
12
- reader = ocr.Reader(['en'],model_storage_directory='Salesforce/blip-image-captioning-large')
13
- return reader
14
-
15
- reader = load_model()
16
-
17
- if image is not None:
18
- input_image = Image.open(image)
19
- st.image(input_image)
20
- with st.spinner("AI is processing your request."):
21
- result = reader.readtext(np.array(input_image))
22
- result_text = []
23
- for text in result:
24
- result_text.append(text[1])
25
- st.write(result_text)
26
- st.balloons()
27
- else:
28
- st.write("Upload an Image")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ from typing import Any
4
+
5
+ import requests
6
+ import streamlit as st
7
+ from langchain.chains import LLMChain
8
+ from langchain.chat_models import ChatOpenAI
9
+ from langchain.prompts import PromptTemplate
10
+ from transformers import pipeline
11
+ from utils import css_code
12
+
13
+ HUGGINGFACE_API_TOKEN = st.secrets["HUGGINGFACE_API_TOKEN"]
14
+ OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
15
+ MODEL = st.secrets["MODEL2"]
16
+
17
+
18
+ def progress_bar(amount_of_time: int) -> Any:
19
+ """
20
+ A very simple progress bar the increases over time,
21
+ then disappears when it reached completion
22
+ :param amount_of_time: time taken
23
+ :return: None
24
+ """
25
+ progress_text = "Please wait, Generative models hard at work"
26
+ my_bar = st.progress(0, text=progress_text)
27
+
28
+ for percent_complete in range(amount_of_time):
29
+ time.sleep(0.04)
30
+ my_bar.progress(percent_complete + 1, text=progress_text)
31
+ time.sleep(1)
32
+ my_bar.empty()
33
+
34
+
35
+ def generate_text_from_image(url: str) -> str:
36
+ """
37
+ A function that uses the blip model to generate text from an image.
38
+ :param url: image location
39
+ :return: text: generated text from the image
40
+ """
41
+ image_to_text: Any = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
42
+
43
+ generated_text: str = image_to_text(url)[0]["generated_text"]
44
+
45
+ print(f"IMAGE INPUT: {url}")
46
+ print(f"GENERATED TEXT OUTPUT: {generated_text}")
47
+ return generated_text
48
+
49
+
50
+ def generate_story_from_text(scenario: str) -> str:
51
+ """
52
+ A function using a prompt template and GPT to generate a short story. LangChain is also
53
+ used for chaining purposes
54
+ :param scenario: generated text from the image
55
+ :return: generated story from the text
56
+ """
57
+ prompt_template: str = f"""
58
+ You are a story teller;
59
+ You can generate a long story based on a simple narrative, the story should be no more than 100 words and have more than 30 words;
60
+
61
+ CONTEXT: {scenario}
62
+ STORY:
63
+ """
64
+
65
+ prompt: PromptTemplate = PromptTemplate(template=prompt_template, input_variables=["scenario"])
66
+
67
+ llm: Any = ChatOpenAI(model_name=MODEL, temperature=1)
68
+
69
+ story_llm: Any = LLMChain(llm=llm, prompt=prompt, verbose=True)
70
+
71
+ generated_story: str = story_llm.predict(scenario=scenario)
72
+
73
+ print(f"TEXT INPUT: {scenario}")
74
+ print(f"GENERATED STORY OUTPUT: {generated_story}")
75
+ return generated_story
76
+
77
+
78
+ def generate_speech_from_text(message: str) -> Any:
79
+ """
80
+ A function using the ESPnet text to speech model from HuggingFace
81
+ :param message: short story generated by the GPT model
82
+ :return: generated audio from the short story
83
+ """
84
+ API_URL: str = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
85
+ headers: dict[str, str] = {"Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}"}
86
+ payloads: dict[str, str] = {
87
+ "inputs": message
88
+ }
89
+ response: Any = requests.post(API_URL, headers=headers, json=payloads)
90
+ with open("generated_audio.flac", "wb") as file:
91
+ file.write(response.content)
92
+ st.download_button(
93
+ label="Download audio (FLAC) file",
94
+ data=response.content,
95
+ file_name='generated_audio.flac',
96
+ mime='flac',
97
+ )
98
+
99
+
100
+ def main() -> None:
101
+ """
102
+ Main function
103
+ :return: None
104
+ """
105
+ st.set_page_config(page_title="Image to audio story", page_icon="img/logo.png", layout="wide")
106
+
107
+ st.markdown(css_code, unsafe_allow_html=True)
108
+
109
+ with st.sidebar:
110
+ st.image("img/kandinsky.jpg")
111
+ #st.write("---")
112
+ st.title("Image to Story")
113
+
114
+ st.header("Generate audio story from an image")
115
+ uploaded_file: Any = st.file_uploader("Please choose a file to upload", type=["jpg", "png", "jpeg", "tif"])
116
+
117
+ if uploaded_file is not None:
118
+ print(uploaded_file)
119
+ bytes_data: Any = uploaded_file.getvalue()
120
+ with open(uploaded_file.name, "wb") as file:
121
+ file.write(bytes_data)
122
+ st.image(uploaded_file, caption="Uploaded Image",
123
+ use_column_width=True)
124
+ progress_bar(100)
125
+ scenario: str = generate_text_from_image(uploaded_file.name)
126
+ story: str = generate_story_from_text(scenario)
127
+ #generate_speech_from_text(story)
128
+
129
+ with st.expander("Generated scenario"):
130
+ st.write(scenario)
131
+ with st.expander("Generated story"):
132
+ st.write(story)
133
+
134
+ #st.audio("generated_audio.flac")
135
+
136
+
137
+ if __name__ == "__main__":
138
+ main()